52 files changed, 7766 insertions, 5265 deletions
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 124e766..5e96138 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -64,49 +64,72 @@ PyObject_Length(PyObject *o)
 }
 #define PyObject_Length PyObject_Size
 
+int
+_PyObject_HasLen(PyObject *o) {
+    return (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_length) ||
+        (Py_TYPE(o)->tp_as_mapping && Py_TYPE(o)->tp_as_mapping->mp_length);
+}
 
 /* The length hint function returns a non-negative value from o.__len__()
-   or o.__length_hint__().  If those methods aren't found or return a negative
-   value, then the defaultvalue is returned.  If one of the calls fails,
+   or o.__length_hint__(). If those methods aren't found the defaultvalue is
+   returned.  If one of the calls fails with an exception other than TypeError
    this function returns -1.
 */
 
 Py_ssize_t
-_PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue)
+PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue)
 {
+    PyObject *hint, *result;
+    Py_ssize_t res;
     _Py_IDENTIFIER(__length_hint__);
-    PyObject *ro, *hintmeth;
-    Py_ssize_t rv;
-
-    /* try o.__len__() */
-    rv = PyObject_Size(o);
-    if (rv >= 0)
-        return rv;
-    if (PyErr_Occurred()) {
-        if (!PyErr_ExceptionMatches(PyExc_TypeError))
-            return -1;
-        PyErr_Clear();
+    if (_PyObject_HasLen(o)) {
+        res = PyObject_Length(o);
+        if (res < 0 && PyErr_Occurred()) {
+            if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
+                return -1;
+            }
+            PyErr_Clear();
+        }
+        else {
+            return res;
+        }
     }
-
-    /* try o.__length_hint__() */
-    hintmeth = _PyObject_LookupSpecial(o, &PyId___length_hint__);
-    if (hintmeth == NULL) {
-        if (PyErr_Occurred())
+    hint = _PyObject_LookupSpecial(o, &PyId___length_hint__);
+    if (hint == NULL) {
+        if (PyErr_Occurred()) {
             return -1;
-        else
+        }
+        return defaultvalue;
+    }
+    result = PyObject_CallFunctionObjArgs(hint, NULL);
+    Py_DECREF(hint);
+    if (result == NULL) {
+        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+            PyErr_Clear();
             return defaultvalue;
+        }
+        return -1;
     }
-    ro = PyObject_CallFunctionObjArgs(hintmeth, NULL);
-    Py_DECREF(hintmeth);
-    if (ro == NULL) {
-        if (!PyErr_ExceptionMatches(PyExc_TypeError))
-            return -1;
-        PyErr_Clear();
+    else if (result == Py_NotImplemented) {
+        Py_DECREF(result);
         return defaultvalue;
     }
-    rv = PyLong_Check(ro) ? PyLong_AsSsize_t(ro) : defaultvalue;
-    Py_DECREF(ro);
-    return rv;
+    if (!PyLong_Check(result)) {
+        PyErr_Format(PyExc_TypeError, "__length_hint__ must be an integer, not %.100s",
+            Py_TYPE(result)->tp_name);
+        Py_DECREF(result);
+        return -1;
+    }
+    res = PyLong_AsSsize_t(result);
+    Py_DECREF(result);
+    if (res < 0 && PyErr_Occurred()) {
+        return -1;
+    }
+    if (res < 0) {
+        PyErr_Format(PyExc_ValueError, "__length_hint__() should return >= 0");
+        return -1;
+    }
+    return res;
 }
 
 PyObject *
@@ -202,7 +225,7 @@ PyObject_DelItem(PyObject *o, PyObject *key)
 }
 
 int
-PyObject_DelItemString(PyObject *o, char *key)
+PyObject_DelItemString(PyObject *o, const char *key)
 {
     PyObject *okey;
     int ret;
@@ -227,28 +250,7 @@ PyObject_AsCharBuffer(PyObject *obj,
                       const char **buffer,
                       Py_ssize_t *buffer_len)
 {
-    PyBufferProcs *pb;
-    Py_buffer view;
-
-    if (obj == NULL || buffer == NULL || buffer_len == NULL) {
-        null_error();
-        return -1;
-    }
-    pb = obj->ob_type->tp_as_buffer;
-    if (pb == NULL || pb->bf_getbuffer == NULL) {
-        PyErr_SetString(PyExc_TypeError,
-                        "expected bytes, bytearray "
-                        "or buffer compatible object");
-        return -1;
-    }
-    if ((*pb->bf_getbuffer)(obj, &view, PyBUF_SIMPLE)) return -1;
-
-    *buffer = view.buf;
-    *buffer_len = view.len;
-    if (pb->bf_releasebuffer != NULL)
-        (*pb->bf_releasebuffer)(obj, &view);
-    Py_XDECREF(view.obj);
-    return 0;
+    return PyObject_AsReadBuffer(obj, (const void **)buffer, buffer_len);
 }
 
 int
@@ -272,28 +274,18 @@ int PyObject_AsReadBuffer(PyObject *obj,
                           const void **buffer,
                           Py_ssize_t *buffer_len)
 {
-    PyBufferProcs *pb;
     Py_buffer view;
 
     if (obj == NULL || buffer == NULL || buffer_len == NULL) {
         null_error();
         return -1;
     }
-    pb = obj->ob_type->tp_as_buffer;
-    if (pb == NULL ||
-        pb->bf_getbuffer == NULL) {
-        PyErr_SetString(PyExc_TypeError,
-                        "expected an object with a buffer interface");
+    if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
         return -1;
-    }
-
-    if ((*pb->bf_getbuffer)(obj, &view, PyBUF_SIMPLE)) return -1;
 
     *buffer = view.buf;
     *buffer_len = view.len;
-    if (pb->bf_releasebuffer != NULL)
-        (*pb->bf_releasebuffer)(obj, &view);
-    Py_XDECREF(view.obj);
+    PyBuffer_Release(&view);
     return 0;
 }
 
@@ -319,9 +311,7 @@ int PyObject_AsWriteBuffer(PyObject *obj,
 
     *buffer = view.buf;
     *buffer_len = view.len;
-    if (pb->bf_releasebuffer != NULL)
-        (*pb->bf_releasebuffer)(obj, &view);
-    Py_XDECREF(view.obj);
+    PyBuffer_Release(&view);
     return 0;
 }
 
@@ -330,13 +320,15 @@ int PyObject_AsWriteBuffer(PyObject *obj,
 int
 PyObject_GetBuffer(PyObject *obj, Py_buffer *view, int flags)
 {
-    if (!PyObject_CheckBuffer(obj)) {
+    PyBufferProcs *pb = obj->ob_type->tp_as_buffer;
+
+    if (pb == NULL || pb->bf_getbuffer == NULL) {
         PyErr_Format(PyExc_TypeError,
                      "'%.100s' does not support the buffer interface",
                      Py_TYPE(obj)->tp_name);
         return -1;
     }
-    return (*(obj->ob_type->tp_as_buffer->bf_getbuffer))(obj, view, flags);
+    return (*pb->bf_getbuffer)(obj, view, flags);
 }
 
 static int
@@ -465,7 +457,7 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort)
 
     /* Otherwise a more elaborate scheme is needed */
 
-    /* XXX(nnorwitz): need to check for overflow! */
+    /* view->ndim <= 64 */
     indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim));
     if (indices == NULL) {
         PyErr_NoMemory();
@@ -487,10 +479,10 @@ PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort)
      */
     elements = len / view->itemsize;
     while (elements--) {
-        addone(view->ndim, indices, view->shape);
         ptr = PyBuffer_GetPointer(view, indices);
         memcpy(ptr, src, view->itemsize);
         src += view->itemsize;
+        addone(view->ndim, indices, view->shape);
     }
 
     PyMem_Free(indices);
@@ -629,10 +621,14 @@ void
 PyBuffer_Release(Py_buffer *view)
 {
     PyObject *obj = view->obj;
-    if (obj && Py_TYPE(obj)->tp_as_buffer && Py_TYPE(obj)->tp_as_buffer->bf_releasebuffer)
-        Py_TYPE(obj)->tp_as_buffer->bf_releasebuffer(obj, view);
-    Py_XDECREF(obj);
+    PyBufferProcs *pb;
+    if (obj == NULL)
+        return;
+    pb = Py_TYPE(obj)->tp_as_buffer;
+    if (pb && pb->bf_releasebuffer)
+        pb->bf_releasebuffer(obj, view);
     view->obj = NULL;
+    Py_DECREF(obj);
 }
 
 PyObject *
@@ -1162,6 +1158,15 @@ PyNumber_Index(PyObject *item)
         Py_DECREF(result);
         return NULL;
     }
+    /* Issue #17576: warn if 'result' not of exact type int. */
+    if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+            "__index__ returned non-int (type %.200s).  "
+            "The ability to return an instance of a strict subclass of int "
+            "is deprecated, and may be removed in a future version of Python.",
+            result->ob_type->tp_name)) {
+        Py_DECREF(result);
+        return NULL;
+    }
     return result;
 }
 
@@ -1217,8 +1222,7 @@ PyNumber_Long(PyObject *o)
 {
     PyNumberMethods *m;
     PyObject *trunc_func;
-    const char *buffer;
-    Py_ssize_t buffer_len;
+    Py_buffer view;
     _Py_IDENTIFIER(__trunc__);
 
     if (o == NULL)
@@ -1239,7 +1243,7 @@ PyNumber_Long(PyObject *o)
         if (truncated == NULL || PyLong_Check(truncated))
             return truncated;
         /* __trunc__ is specified to return an Integral type,
-           but int() needs to return a int. */
+           but int() needs to return an int. */
         m = truncated->ob_type->tp_as_number;
         if (m == NULL || m->nb_int == NULL) {
             PyErr_Format(
@@ -1256,6 +1260,10 @@ PyNumber_Long(PyObject *o)
     if (PyErr_Occurred())
         return NULL;
 
+    if (PyUnicode_Check(o))
+        /* The below check is done in PyLong_FromUnicode(). */
+        return PyLong_FromUnicodeObject(o, 10);
+
     if (PyBytes_Check(o))
         /* need to do extra error checking that PyLong_FromString()
          * doesn't do.  In particular int('9\x005') must raise an
@@ -1263,14 +1271,29 @@ PyNumber_Long(PyObject *o)
          */
         return _PyLong_FromBytes(PyBytes_AS_STRING(o),
                                  PyBytes_GET_SIZE(o), 10);
-    if (PyUnicode_Check(o))
-        /* The above check is done in PyLong_FromUnicode(). */
-        return PyLong_FromUnicodeObject(o, 10);
-    if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len))
-        return _PyLong_FromBytes(buffer, buffer_len, 10);
 
-    return type_error("int() argument must be a string or a "
-                      "number, not '%.200s'", o);
+    if (PyByteArray_Check(o))
+        return _PyLong_FromBytes(PyByteArray_AS_STRING(o),
+                                 PyByteArray_GET_SIZE(o), 10);
+
+    if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) {
+        PyObject *result, *bytes;
+
+        /* Copy to NUL-terminated buffer. */
+        bytes = PyBytes_FromStringAndSize((const char *)view.buf, view.len);
+        if (bytes == NULL) {
+            PyBuffer_Release(&view);
+            return NULL;
+        }
+        result = _PyLong_FromBytes(PyBytes_AS_STRING(bytes),
+                                   PyBytes_GET_SIZE(bytes), 10);
+        Py_DECREF(bytes);
+        PyBuffer_Release(&view);
+        return result;
+    }
+
+    return type_error("int() argument must be a string, a bytes-like object "
+                      "or a number, not '%.200s'", o);
 }
 
 PyObject *
@@ -1631,7 +1654,7 @@ PySequence_Tuple(PyObject *v)
         Py_INCREF(v);
         return v;
     }
-    if (PyList_Check(v))
+    if (PyList_CheckExact(v))
         return PyList_AsTuple(v);
 
     /* Get iterator. */
@@ -1640,7 +1663,7 @@ PySequence_Tuple(PyObject *v)
         return NULL;
 
     /* Guess result size and allocate space. */
-    n = _PyObject_LengthHint(v, 10);
+    n = PyObject_LengthHint(v, 10);
     if (n == -1)
         goto Fail;
     result = PyTuple_New(n);
@@ -1900,7 +1923,7 @@ PyMapping_Length(PyObject *o)
 #define PyMapping_Length PyMapping_Size
 
 PyObject *
-PyMapping_GetItemString(PyObject *o, char *key)
+PyMapping_GetItemString(PyObject *o, const char *key)
 {
     PyObject *okey, *r;
 
@@ -1916,7 +1939,7 @@ PyMapping_GetItemString(PyObject *o, char *key)
 }
 
 int
-PyMapping_SetItemString(PyObject *o, char *key, PyObject *value)
+PyMapping_SetItemString(PyObject *o, const char *key, PyObject *value)
 {
     PyObject *okey;
     int r;
@@ -1935,7 +1958,7 @@ PyMapping_SetItemString(PyObject *o, char *key, PyObject *value)
 }
 
 int
-PyMapping_HasKeyString(PyObject *o, char *key)
+PyMapping_HasKeyString(PyObject *o, const char *key)
 {
     PyObject *v;
 
@@ -2034,10 +2057,16 @@ PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw)
             return NULL;
         result = (*call)(func, arg, kw);
         Py_LeaveRecursiveCall();
-        if (result == NULL && !PyErr_Occurred())
+#ifdef NDEBUG
+        if (result == NULL && !PyErr_Occurred()) {
             PyErr_SetString(
                 PyExc_SystemError,
                 "NULL result without error in PyObject_Call");
+        }
+#else
+        assert((result != NULL && !PyErr_Occurred())
+                || (result == NULL && PyErr_Occurred()));
+#endif
         return result;
     }
     PyErr_Format(PyExc_TypeError, "'%.200s' object is not callable",
@@ -2072,7 +2101,7 @@ call_function_tail(PyObject *callable, PyObject *args)
 }
 
 PyObject *
-PyObject_CallFunction(PyObject *callable, char *format, ...)
+PyObject_CallFunction(PyObject *callable, const char *format, ...)
 {
     va_list va;
     PyObject *args;
@@ -2087,12 +2116,14 @@ PyObject_CallFunction(PyObject *callable, char *format, ...)
     }
     else
         args = PyTuple_New(0);
+    if (args == NULL)
+        return NULL;
 
     return call_function_tail(callable, args);
 }
 
 PyObject *
-_PyObject_CallFunction_SizeT(PyObject *callable, char *format, ...)
+_PyObject_CallFunction_SizeT(PyObject *callable, const char *format, ...)
 {
     va_list va;
     PyObject *args;
@@ -2112,7 +2143,7 @@ _PyObject_CallFunction_SizeT(PyObject *callable, char *format, ...)
 }
 
 static PyObject*
-callmethod(PyObject* func, char *format, va_list va, int is_size_t)
+callmethod(PyObject* func, const char *format, va_list va, int is_size_t)
 {
     PyObject *retval = NULL;
     PyObject *args;
@@ -2141,7 +2172,7 @@ callmethod(PyObject* func, char *format, va_list va, int is_size_t)
 }
 
 PyObject *
-PyObject_CallMethod(PyObject *o, char *name, char *format, ...)
+PyObject_CallMethod(PyObject *o, const char *name, const char *format, ...)
 {
     va_list va;
     PyObject *func = NULL;
@@ -2151,9 +2182,8 @@ PyObject_CallMethod(PyObject *o, char *name, char *format, ...)
         return null_error();
 
     func = PyObject_GetAttrString(o, name);
-    if (func == NULL) {
-        return 0;
-    }
+    if (func == NULL)
+        return NULL;
 
     va_start(va, format);
     retval = callmethod(func, format, va, 0);
@@ -2162,7 +2192,8 @@ PyObject_CallMethod(PyObject *o, char *name, char *format, ...)
 }
 
 PyObject *
-_PyObject_CallMethodId(PyObject *o, _Py_Identifier *name, char *format, ...)
+_PyObject_CallMethodId(PyObject *o, _Py_Identifier *name,
+                       const char *format, ...)
 {
     va_list va;
     PyObject *func = NULL;
@@ -2172,9 +2203,8 @@ _PyObject_CallMethodId(PyObject *o, _Py_Identifier *name, char *format, ...)
         return null_error();
 
     func = _PyObject_GetAttrId(o, name);
-    if (func == NULL) {
-        return 0;
-    }
+    if (func == NULL)
+        return NULL;
 
     va_start(va, format);
     retval = callmethod(func, format, va, 0);
@@ -2183,7 +2213,8 @@ _PyObject_CallMethodId(PyObject *o, _Py_Identifier *name, char *format, ...)
 }
 
 PyObject *
-_PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...)
+_PyObject_CallMethod_SizeT(PyObject *o, const char *name,
+                           const char *format, ...)
 {
     va_list va;
     PyObject *func = NULL;
@@ -2193,9 +2224,8 @@ _PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...)
         return null_error();
 
     func = PyObject_GetAttrString(o, name);
-    if (func == NULL) {
-        return 0;
-    }
+    if (func == NULL)
+        return NULL;
     va_start(va, format);
     retval = callmethod(func, format, va, 1);
     va_end(va);
@@ -2203,7 +2233,8 @@ _PyObject_CallMethod_SizeT(PyObject *o, char *name, char *format, ...)
 }
 
 PyObject *
-_PyObject_CallMethodId_SizeT(PyObject *o, _Py_Identifier *name, char *format, ...)
+_PyObject_CallMethodId_SizeT(PyObject *o, _Py_Identifier *name,
+                             const char *format, ...)
 {
     va_list va;
     PyObject *func = NULL;
@@ -2273,7 +2304,7 @@ PyObject_CallMethodObjArgs(PyObject *callable, PyObject *name, ...)
 }
 
 PyObject *
-_PyObject_CallMethodObjIdArgs(PyObject *callable,
+_PyObject_CallMethodIdObjArgs(PyObject *callable,
         struct _Py_Identifier *name, ...)
 {
     PyObject *args, *tmp;
@@ -2648,8 +2679,8 @@ PyIter_Next(PyObject *iter)
  * NULL terminated string pointers with a NULL char* terminating the array.
  * (ie: an argv or env list)
  *
- * Memory allocated for the returned list is allocated using malloc() and MUST
- * be freed by the caller using a free() loop or _Py_FreeCharPArray().
+ * Memory allocated for the returned list is allocated using PyMem_Malloc()
+ * and MUST be freed by _Py_FreeCharPArray().
  */
 char *const *
 _PySequence_BytesToCharpArray(PyObject* self)
@@ -2657,6 +2688,7 @@ _PySequence_BytesToCharpArray(PyObject* self)
     char **array;
     Py_ssize_t i, argc;
     PyObject *item = NULL;
+    Py_ssize_t size;
 
     argc = PySequence_Size(self);
     if (argc == -1)
@@ -2669,7 +2701,7 @@ _PySequence_BytesToCharpArray(PyObject* self)
         return NULL;
     }
 
-    array = malloc((argc + 1) * sizeof(char *));
+    array = PyMem_Malloc((argc + 1) * sizeof(char *));
     if (array == NULL) {
         PyErr_NoMemory();
         return NULL;
@@ -2688,11 +2720,13 @@ _PySequence_BytesToCharpArray(PyObject* self)
             array[i] = NULL;
             goto fail;
         }
-        array[i] = strdup(data);
+        size = PyBytes_GET_SIZE(item) + 1;
+        array[i] = PyMem_Malloc(size);
         if (!array[i]) {
             PyErr_NoMemory();
             goto fail;
         }
+        memcpy(array[i], data, size);
         Py_DECREF(item);
     }
     array[argc] = NULL;
@@ -2712,7 +2746,7 @@ _Py_FreeCharPArray(char *const array[])
 {
     Py_ssize_t i;
     for (i = 0; array[i] != NULL; ++i) {
-        free(array[i]);
+        PyMem_Free(array[i]);
     }
-    free((void*)array);
+    PyMem_Free((void*)array);
 }
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 50667a6..15c525c 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -74,24 +74,6 @@ bytearray_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
     obj->ob_exports--;
 }
 
-static Py_ssize_t
-_getbuffer(PyObject *obj, Py_buffer *view)
-{
-    PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
-
-    if (buffer == NULL || buffer->bf_getbuffer == NULL)
-    {
-        PyErr_Format(PyExc_TypeError,
-                     "Type %.100s doesn't support the buffer API",
-                     Py_TYPE(obj)->tp_name);
-        return -1;
-    }
-
-    if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
-            return -1;
-    return view->len;
-}
-
 static int
 _canresize(PyByteArrayObject *self)
 {
@@ -150,6 +132,7 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
     }
     Py_SIZE(new) = size;
     new->ob_alloc = alloc;
+    new->ob_start = new->ob_bytes;
     new->ob_exports = 0;
 
     return (PyObject *)new;
@@ -174,51 +157,80 @@ PyByteArray_AsString(PyObject *self)
 }
 
 int
-PyByteArray_Resize(PyObject *self, Py_ssize_t size)
+PyByteArray_Resize(PyObject *self, Py_ssize_t requested_size)
 {
     void *sval;
-    Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
+    PyByteArrayObject *obj = ((PyByteArrayObject *)self);
+    /* All computations are done unsigned to avoid integer overflows
+       (see issue #22335). */
+    size_t alloc = (size_t) obj->ob_alloc;
+    size_t logical_offset = (size_t) (obj->ob_start - obj->ob_bytes);
+    size_t size = (size_t) requested_size;
 
     assert(self != NULL);
     assert(PyByteArray_Check(self));
-    assert(size >= 0);
+    assert(logical_offset <= alloc);
+    assert(requested_size >= 0);
 
-    if (size == Py_SIZE(self)) {
+    if (requested_size == Py_SIZE(self)) {
         return 0;
     }
-    if (!_canresize((PyByteArrayObject *)self)) {
+    if (!_canresize(obj)) {
         return -1;
     }
 
-    if (size < alloc / 2) {
-        /* Major downsize; resize down to exact size */
-        alloc = size + 1;
-    }
-    else if (size < alloc) {
-        /* Within allocated size; quick exit */
-        Py_SIZE(self) = size;
-        ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
-        return 0;
-    }
-    else if (size <= alloc * 1.125) {
-        /* Moderate upsize; overallocate similar to list_resize() */
-        alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
+    if (size + logical_offset + 1 <= alloc) {
+        /* Current buffer is large enough to host the requested size,
+           decide on a strategy. */
+        if (size < alloc / 2) {
+            /* Major downsize; resize down to exact size */
+            alloc = size + 1;
+        }
+        else {
+            /* Minor downsize; quick exit */
+            Py_SIZE(self) = size;
+            PyByteArray_AS_STRING(self)[size] = '\0'; /* Trailing null */
+            return 0;
+        }
     }
     else {
-        /* Major upsize; resize up to exact size */
-        alloc = size + 1;
+        /* Need growing, decide on a strategy */
+        if (size <= alloc * 1.125) {
+            /* Moderate upsize; overallocate similar to list_resize() */
+            alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
+        }
+        else {
+            /* Major upsize; resize up to exact size */
+            alloc = size + 1;
+        }
     }
-
-    sval = PyObject_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
-    if (sval == NULL) {
+    if (alloc > PY_SSIZE_T_MAX) {
         PyErr_NoMemory();
         return -1;
     }
 
-    ((PyByteArrayObject *)self)->ob_bytes = sval;
+    if (logical_offset > 0) {
+        sval = PyObject_Malloc(alloc);
+        if (sval == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        memcpy(sval, PyByteArray_AS_STRING(self),
+               Py_MIN(requested_size, Py_SIZE(self)));
+        PyObject_Free(obj->ob_bytes);
+    }
+    else {
+        sval = PyObject_Realloc(obj->ob_bytes, alloc);
+        if (sval == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+    }
+
+    obj->ob_bytes = obj->ob_start = sval;
     Py_SIZE(self) = size;
-    ((PyByteArrayObject *)self)->ob_alloc = alloc;
-    ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
+    obj->ob_alloc = alloc;
+    obj->ob_bytes[size] = '\0'; /* Trailing null byte */
 
     return 0;
 }
@@ -232,8 +244,8 @@ PyByteArray_Concat(PyObject *a, PyObject *b)
 
     va.len = -1;
     vb.len = -1;
-    if (_getbuffer(a, &va) < 0  ||
-        _getbuffer(b, &vb) < 0) {
+    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
+        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
                          Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
             goto done;
@@ -274,7 +286,7 @@ bytearray_iconcat(PyByteArrayObject *self, PyObject *other)
     Py_ssize_t size;
     Py_buffer vo;
 
-    if (_getbuffer(other, &vo) < 0) {
+    if (PyObject_GetBuffer(other, &vo, PyBUF_SIMPLE) != 0) {
         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
                      Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
         return NULL;
@@ -286,15 +298,11 @@ bytearray_iconcat(PyByteArrayObject *self, PyObject *other)
         PyBuffer_Release(&vo);
         return PyErr_NoMemory();
     }
-    if (size < self->ob_alloc) {
-        Py_SIZE(self) = size;
-        self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
-    }
-    else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
+    if (PyByteArray_Resize((PyObject *)self, size) < 0) {
         PyBuffer_Release(&vo);
         return NULL;
     }
-    memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
+    memcpy(PyByteArray_AS_STRING(self) + mysize, vo.buf, vo.len);
     PyBuffer_Release(&vo);
     Py_INCREF(self);
     return (PyObject *)self;
@@ -331,6 +339,7 @@ bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
 {
     Py_ssize_t mysize;
     Py_ssize_t size;
+    char *buf;
 
     if (count < 0)
         count = 0;
@@ -338,19 +347,16 @@ bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
     if (count > 0 && mysize > PY_SSIZE_T_MAX / count)
         return PyErr_NoMemory();
     size = mysize * count;
-    if (size < self->ob_alloc) {
-        Py_SIZE(self) = size;
-        self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
-    }
-    else if (PyByteArray_Resize((PyObject *)self, size) < 0)
+    if (PyByteArray_Resize((PyObject *)self, size) < 0)
         return NULL;
 
+    buf = PyByteArray_AS_STRING(self);
     if (mysize == 1)
-        memset(self->ob_bytes, self->ob_bytes[0], size);
+        memset(buf, buf[0], size);
     else {
         Py_ssize_t i;
         for (i = 1; i < count; i++)
-            memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
+            memcpy(buf + i*mysize, buf, mysize);
     }
 
     Py_INCREF(self);
@@ -366,7 +372,7 @@ bytearray_getitem(PyByteArrayObject *self, Py_ssize_t i)
         PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
         return NULL;
     }
-    return PyLong_FromLong((unsigned char)(self->ob_bytes[i]));
+    return PyLong_FromLong((unsigned char)(PyByteArray_AS_STRING(self)[i]));
 }
 
 static PyObject *
@@ -385,7 +391,7 @@ bytearray_subscript(PyByteArrayObject *self, PyObject *index)
             PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
             return NULL;
         }
-        return PyLong_FromLong((unsigned char)(self->ob_bytes[i]));
+        return PyLong_FromLong((unsigned char)(PyByteArray_AS_STRING(self)[i]));
     }
     else if (PySlice_Check(index)) {
         Py_ssize_t start, stop, step, slicelength, cur, i;
@@ -398,8 +404,8 @@ bytearray_subscript(PyByteArrayObject *self, PyObject *index)
         if (slicelength <= 0)
             return PyByteArray_FromStringAndSize("", 0);
         else if (step == 1) {
-            return PyByteArray_FromStringAndSize(self->ob_bytes + start,
-                                             slicelength);
+            return PyByteArray_FromStringAndSize(
+                PyByteArray_AS_STRING(self) + start, slicelength);
         }
         else {
             char *source_buf = PyByteArray_AS_STRING(self);
@@ -425,10 +431,94 @@ bytearray_subscript(PyByteArrayObject *self, PyObject *index)
 }
 
 static int
+bytearray_setslice_linear(PyByteArrayObject *self,
+                          Py_ssize_t lo, Py_ssize_t hi,
+                          char *bytes, Py_ssize_t bytes_len)
+{
+    Py_ssize_t avail = hi - lo;
+    char *buf = PyByteArray_AS_STRING(self);
+    Py_ssize_t growth = bytes_len - avail;
+    int res = 0;
+    assert(avail >= 0);
+
+    if (growth < 0) {
+        if (!_canresize(self))
+            return -1;
+
+        if (lo == 0) {
+            /* Shrink the buffer by advancing its logical start */
+            self->ob_start -= growth;
+            /*
+              0   lo               hi             old_size
+              |   |<----avail----->|<-----tail------>|
+              |      |<-bytes_len->|<-----tail------>|
+              0    new_lo         new_hi          new_size
+            */
+        }
+        else {
+            /*
+              0   lo               hi               old_size
+              |   |<----avail----->|<-----tomove------>|
+              |   |<-bytes_len->|<-----tomove------>|
+              0   lo         new_hi              new_size
+            */
+            memmove(buf + lo + bytes_len, buf + hi,
+                    Py_SIZE(self) - hi);
+        }
+        if (PyByteArray_Resize((PyObject *)self,
+                               Py_SIZE(self) + growth) < 0) {
+            /* Issue #19578: Handling the memory allocation failure here is
+               tricky here because the bytearray object has already been
+               modified. Depending on growth and lo, the behaviour is
+               different.
+
+               If growth < 0 and lo != 0, the operation is completed, but a
+               MemoryError is still raised and the memory block is not
+               shrinked. Otherwise, the bytearray is restored in its previous
+               state and a MemoryError is raised. */
+            if (lo == 0) {
+                self->ob_start += growth;
+                return -1;
+            }
+            /* memmove() removed bytes, the bytearray object cannot be
+               restored in its previous state. */
+            Py_SIZE(self) += growth;
+            res = -1;
+        }
+        buf = PyByteArray_AS_STRING(self);
+    }
+    else if (growth > 0) {
+        if (Py_SIZE(self) > (Py_ssize_t)PY_SSIZE_T_MAX - growth) {
+            PyErr_NoMemory();
+            return -1;
+        }
+
+        if (PyByteArray_Resize((PyObject *)self,
+                               Py_SIZE(self) + growth) < 0) {
+            return -1;
+        }
+        buf = PyByteArray_AS_STRING(self);
+        /* Make the place for the additional bytes */
+        /*
+          0   lo        hi               old_size
+          |   |<-avail->|<-----tomove------>|
+          |   |<---bytes_len-->|<-----tomove------>|
+          0   lo            new_hi              new_size
+         */
+        memmove(buf + lo + bytes_len, buf + hi,
+                Py_SIZE(self) - lo - bytes_len);
+    }
+
+    if (bytes_len > 0)
+        memcpy(buf + lo, bytes, bytes_len);
+    return res;
+}
+
+static int
 bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
                PyObject *values)
 {
-    Py_ssize_t avail, needed;
+    Py_ssize_t needed;
     void *bytes;
     Py_buffer vbytes;
     int res = 0;
@@ -450,14 +540,14 @@ bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
         needed = 0;
     }
     else {
-            if (_getbuffer(values, &vbytes) < 0) {
-                    PyErr_Format(PyExc_TypeError,
-                                 "can't set bytearray slice from %.100s",
-                                 Py_TYPE(values)->tp_name);
-                    return -1;
-            }
-            needed = vbytes.len;
-            bytes = vbytes.buf;
+        if (PyObject_GetBuffer(values, &vbytes, PyBUF_SIMPLE) != 0) {
+            PyErr_Format(PyExc_TypeError,
+                         "can't set bytearray slice from %.100s",
+                         Py_TYPE(values)->tp_name);
+            return -1;
+        }
+        needed = vbytes.len;
+        bytes = vbytes.buf;
     }
 
     if (lo < 0)
@@ -467,50 +557,9 @@ bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
     if (hi > Py_SIZE(self))
         hi = Py_SIZE(self);
 
-    avail = hi - lo;
-    if (avail < 0)
-        lo = hi = avail = 0;
-
-    if (avail != needed) {
-        if (avail > needed) {
-            if (!_canresize(self)) {
-                res = -1;
-                goto finish;
-            }
-            /*
-              0   lo               hi               old_size
-              |   |<----avail----->|<-----tomove------>|
-              |   |<-needed->|<-----tomove------>|
-              0   lo      new_hi              new_size
-            */
-            memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
-                    Py_SIZE(self) - hi);
-        }
-        /* XXX(nnorwitz): need to verify this can't overflow! */
-        if (PyByteArray_Resize((PyObject *)self,
-                           Py_SIZE(self) + needed - avail) < 0) {
-                res = -1;
-                goto finish;
-        }
-        if (avail < needed) {
-            /*
-              0   lo        hi               old_size
-              |   |<-avail->|<-----tomove------>|
-              |   |<----needed---->|<-----tomove------>|
-              0   lo            new_hi              new_size
-             */
-            memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
-                    Py_SIZE(self) - lo - needed);
-        }
-    }
-
-    if (needed > 0)
-        memcpy(self->ob_bytes + lo, bytes, needed);
-
-
- finish:
+    res = bytearray_setslice_linear(self, lo, hi, bytes, needed);
     if (vbytes.len != -1)
-            PyBuffer_Release(&vbytes);
+        PyBuffer_Release(&vbytes);
     return res;
 }
 
@@ -533,7 +582,7 @@ bytearray_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
     if (!_getbytevalue(value, &ival))
         return -1;
 
-    self->ob_bytes[i] = ival;
+    PyByteArray_AS_STRING(self)[i] = ival;
     return 0;
 }
 
@@ -541,7 +590,8 @@ static int
 bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
 {
     Py_ssize_t start, stop, step, slicelen, needed;
-    char *bytes;
+    char *buf, *bytes;
+    buf = PyByteArray_AS_STRING(self);
 
     if (PyIndex_Check(index)) {
         Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
@@ -568,7 +618,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
             int ival;
             if (!_getbytevalue(values, &ival))
                 return -1;
-            self->ob_bytes[i] = (char)ival;
+            buf[i] = (char)ival;
             return 0;
         }
     }
@@ -606,7 +656,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
     }
     else {
         assert(PyByteArray_Check(values));
-        bytes = ((PyByteArrayObject *)values)->ob_bytes;
+        bytes = PyByteArray_AS_STRING(values);
         needed = Py_SIZE(values);
     }
     /* Make sure b[5:2] = ... inserts before 5, not before 2. */
@@ -614,38 +664,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
         (step > 0 && start > stop))
         stop = start;
     if (step == 1) {
-        if (slicelen != needed) {
-            if (!_canresize(self))
-                return -1;
-            if (slicelen > needed) {
-                /*
-                  0   start           stop              old_size
-                  |   |<---slicelen--->|<-----tomove------>|
-                  |   |<-needed->|<-----tomove------>|
-                  0   lo      new_hi              new_size
-                */
-                memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
-                        Py_SIZE(self) - stop);
-            }
-            if (PyByteArray_Resize((PyObject *)self,
-                               Py_SIZE(self) + needed - slicelen) < 0)
-                return -1;
-            if (slicelen < needed) {
-                /*
-                  0   lo        hi               old_size
-                  |   |<-avail->|<-----tomove------>|
-                  |   |<----needed---->|<-----tomove------>|
-                  0   lo            new_hi              new_size
-                 */
-                memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
-                        Py_SIZE(self) - start - needed);
-            }
-        }
-
-        if (needed > 0)
-            memcpy(self->ob_bytes + start, bytes, needed);
-
-        return 0;
+        return bytearray_setslice_linear(self, start, stop, bytes, needed);
     }
     else {
         if (needed == 0) {
@@ -672,14 +691,14 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
                 if (cur + step >= (size_t)PyByteArray_GET_SIZE(self))
                     lim = PyByteArray_GET_SIZE(self) - cur - 1;
 
-                memmove(self->ob_bytes + cur - i,
-                        self->ob_bytes + cur + 1, lim);
+                memmove(buf + cur - i,
+                        buf + cur + 1, lim);
             }
             /* Move the tail of the bytes, in one chunk */
             cur = start + (size_t)slicelen*step;
             if (cur < (size_t)PyByteArray_GET_SIZE(self)) {
-                memmove(self->ob_bytes + cur - slicelen,
-                        self->ob_bytes + cur,
+                memmove(buf + cur - slicelen,
+                        buf + cur,
                         PyByteArray_GET_SIZE(self) - cur);
             }
             if (PyByteArray_Resize((PyObject *)self,
@@ -701,7 +720,7 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
                 return -1;
             }
             for (cur = start, i = 0; i < slicelen; cur += step, i++)
-                self->ob_bytes[cur] = bytes[i];
+                buf[cur] = bytes[i];
             return 0;
         }
     }
@@ -781,7 +800,7 @@ bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
         if (count > 0) {
             if (PyByteArray_Resize((PyObject *)self, count))
                 return -1;
-            memset(self->ob_bytes, 0, count);
+            memset(PyByteArray_AS_STRING(self), 0, count);
         }
         return 0;
     }
@@ -794,7 +813,8 @@ bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
             return -1;
         size = view.len;
         if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
-        if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
+        if (PyBuffer_ToContiguous(PyByteArray_AS_STRING(self),
+            &view, size, 'C') < 0)
             goto fail;
         PyBuffer_Release(&view);
         return 0;
@@ -834,11 +854,13 @@ bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
             goto error;
 
         /* Append the byte */
-        if (Py_SIZE(self) < self->ob_alloc)
+        if (Py_SIZE(self) + 1 < self->ob_alloc) {
             Py_SIZE(self)++;
+            PyByteArray_AS_STRING(self)[Py_SIZE(self)] = '\0';
+        }
         else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
             goto error;
-        self->ob_bytes[Py_SIZE(self)-1] = value;
+        PyByteArray_AS_STRING(self)[Py_SIZE(self)-1] = value;
     }
 
     /* Clean up and return success */
@@ -862,9 +884,10 @@ bytearray_repr(PyByteArrayObject *self)
     /* 15 == strlen(quote_prefix) + 2 + strlen(quote_postfix) + 1 */
     size_t newsize;
     PyObject *v;
-    register Py_ssize_t i;
-    register char c;
-    register char *p;
+    Py_ssize_t i;
+    char *bytes;
+    char c;
+    char *p;
     int quote;
     char *test, *start;
     char *buffer;
@@ -899,11 +922,12 @@ bytearray_repr(PyByteArrayObject *self)
         *p++ = *quote_prefix++;
     *p++ = quote;
 
+    bytes = PyByteArray_AS_STRING(self);
     for (i = 0; i < length; i++) {
         /* There's at least enough room for a hex escape
            and a closing quote. */
         assert(newsize - (p - buffer) >= 5);
-        c = self->ob_bytes[i];
+        c = bytes[i];
         if (c == '\'' || c == '\\')
             *p++ = '\\', *p++ = c;
         else if (c == '\t')
@@ -952,13 +976,17 @@ bytearray_richcompare(PyObject *self, PyObject *other, int op)
     Py_buffer self_bytes, other_bytes;
     PyObject *res;
     Py_ssize_t minsize;
-    int cmp;
+    int cmp, rc;
 
     /* Bytes can be compared to anything that supports the (binary)
        buffer API.  Except that a comparison with Unicode is always an
        error, even if the comparison is for equality. */
-    if (PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type) ||
-        PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type)) {
+    rc = PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type);
+    if (!rc)
+        rc = PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type);
+    if (rc < 0)
+        return NULL;
+    if (rc) {
         if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
             if (PyErr_WarnEx(PyExc_BytesWarning,
                             "Comparison between bytearray and string", 1))
@@ -968,18 +996,18 @@ bytearray_richcompare(PyObject *self, PyObject *other, int op)
         Py_RETURN_NOTIMPLEMENTED;
     }
 
-    self_size = _getbuffer(self, &self_bytes);
-    if (self_size < 0) {
+    if (PyObject_GetBuffer(self, &self_bytes, PyBUF_SIMPLE) != 0) {
         PyErr_Clear();
         Py_RETURN_NOTIMPLEMENTED;
     }
+    self_size = self_bytes.len;
 
-    other_size = _getbuffer(other, &other_bytes);
-    if (other_size < 0) {
+    if (PyObject_GetBuffer(other, &other_bytes, PyBUF_SIMPLE) != 0) {
         PyErr_Clear();
         PyBuffer_Release(&self_bytes);
         Py_RETURN_NOTIMPLEMENTED;
     }
+    other_size = other_bytes.len;
 
     if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
         /* Shortcut: if the lengths differ, the objects differ */
@@ -1038,6 +1066,7 @@ bytearray_dealloc(PyByteArrayObject *self)
 #define FASTSEARCH fastsearch
 #define STRINGLIB(F) stringlib_##F
 #define STRINGLIB_CHAR char
+#define STRINGLIB_SIZEOF_CHAR 1
 #define STRINGLIB_LEN PyByteArray_GET_SIZE
 #define STRINGLIB_STR PyByteArray_AS_STRING
 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
@@ -1049,6 +1078,7 @@ bytearray_dealloc(PyByteArrayObject *self)
 #include "stringlib/fastsearch.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
+#include "stringlib/join.h"
 #include "stringlib/partition.h"
 #include "stringlib/split.h"
 #include "stringlib/ctype.h"
@@ -1089,7 +1119,7 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
         return -2;
 
     if (subobj) {
-        if (_getbuffer(subobj, &subbuf) < 0)
+        if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
             return -2;
 
         sub = subbuf.buf;
@@ -1157,7 +1187,7 @@ bytearray_count(PyByteArrayObject *self, PyObject *args)
         return NULL;
 
     if (sub_obj) {
-        if (_getbuffer(sub_obj, &vsub) < 0)
+        if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
             return NULL;
 
         sub = vsub.buf;
@@ -1272,7 +1302,7 @@ bytearray_contains(PyObject *self, PyObject *arg)
         Py_buffer varg;
         Py_ssize_t pos;
         PyErr_Clear();
-        if (_getbuffer(arg, &varg) < 0)
+        if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
             return -1;
         pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
                              varg.buf, varg.len, 0);
@@ -1303,7 +1333,7 @@ _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start
 
     str = PyByteArray_AS_STRING(self);
 
-    if (_getbuffer(substr, &vsubstr) < 0)
+    if (PyObject_GetBuffer(substr, &vsubstr, PyBUF_SIMPLE) != 0)
         return -1;
 
     ADJUST_INDICES(start, end, len);
@@ -1429,9 +1459,9 @@ table, which must be a bytes object of length 256.");
 static PyObject *
 bytearray_translate(PyByteArrayObject *self, PyObject *args)
 {
-    register char *input, *output;
-    register const char *table;
-    register Py_ssize_t i, c;
+    char *input, *output;
+    const char *table;
+    Py_ssize_t i, c;
     PyObject *input_obj = (PyObject*)self;
     const char *output_start;
     Py_ssize_t inlen;
@@ -1447,7 +1477,7 @@ bytearray_translate(PyByteArrayObject *self, PyObject *args)
     if (tableobj == Py_None) {
         table = NULL;
         tableobj = NULL;
-    } else if (_getbuffer(tableobj, &vtable) < 0) {
+    } else if (PyObject_GetBuffer(tableobj, &vtable, PyBUF_SIMPLE) != 0) {
         return NULL;
     } else {
         if (vtable.len != 256) {
@@ -1460,7 +1490,7 @@ bytearray_translate(PyByteArrayObject *self, PyObject *args)
     }
 
     if (delobj != NULL) {
-        if (_getbuffer(delobj, &vdel) < 0) {
+        if (PyObject_GetBuffer(delobj, &vdel, PyBUF_SIMPLE) != 0) {
             if (tableobj != NULL)
                 PyBuffer_Release(&vtable);
             return NULL;
@@ -2024,26 +2054,20 @@ given, only the first count occurrences are replaced.");
 static PyObject *
 bytearray_replace(PyByteArrayObject *self, PyObject *args)
 {
+    PyObject *res;
+    Py_buffer old = {NULL, NULL};
+    Py_buffer new = {NULL, NULL};
     Py_ssize_t count = -1;
-    PyObject *from, *to, *res;
-    Py_buffer vfrom, vto;
-
-    if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
-        return NULL;
 
-    if (_getbuffer(from, &vfrom) < 0)
+    if (!PyArg_ParseTuple(args, "y*y*|n:replace", &old, &new, &count))
         return NULL;
-    if (_getbuffer(to, &vto) < 0) {
-        PyBuffer_Release(&vfrom);
-        return NULL;
-    }
 
     res = (PyObject *)replace((PyByteArrayObject *) self,
-                              vfrom.buf, vfrom.len,
-                              vto.buf, vto.len, count);
+                              (const char *)old.buf, old.len,
+                              (const char *)new.buf, new.len, count);
 
-    PyBuffer_Release(&vfrom);
-    PyBuffer_Release(&vto);
+    PyBuffer_Release(&old);
+    PyBuffer_Release(&new);
     return res;
 }
 
@@ -2074,7 +2098,7 @@ bytearray_split(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
     if (subobj == Py_None)
         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
 
-    if (_getbuffer(subobj, &vsub) < 0)
+    if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0)
         return NULL;
     sub = vsub.buf;
     n = vsub.len;
@@ -2169,7 +2193,7 @@ bytearray_rsplit(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
     if (subobj == Py_None)
         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
 
-    if (_getbuffer(subobj, &vsub) < 0)
+    if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0)
         return NULL;
     sub = vsub.buf;
     n = vsub.len;
@@ -2192,7 +2216,7 @@ bytearray_reverse(PyByteArrayObject *self, PyObject *unused)
     Py_ssize_t i, j, n = Py_SIZE(self);
 
     j = n / 2;
-    head = self->ob_bytes;
+    head = PyByteArray_AS_STRING(self);
     tail = head + n - 1;
     for (i = 0; i < j; i++) {
         swap = *head;
@@ -2213,6 +2237,7 @@ bytearray_insert(PyByteArrayObject *self, PyObject *args)
     PyObject *value;
     int ival;
     Py_ssize_t where, n = Py_SIZE(self);
+    char *buf;
 
     if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
         return NULL;
@@ -2226,6 +2251,7 @@ bytearray_insert(PyByteArrayObject *self, PyObject *args)
         return NULL;
     if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
         return NULL;
+    buf = PyByteArray_AS_STRING(self);
 
     if (where < 0) {
         where += n;
@@ -2234,8 +2260,8 @@ bytearray_insert(PyByteArrayObject *self, PyObject *args)
     }
     if (where > n)
         where = n;
-    memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
-    self->ob_bytes[where] = ival;
+    memmove(buf + where + 1, buf + where, n - where);
+    buf[where] = ival;
 
     Py_RETURN_NONE;
 }
@@ -2260,7 +2286,7 @@ bytearray_append(PyByteArrayObject *self, PyObject *arg)
     if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
         return NULL;
 
-    self->ob_bytes[n] = value;
+    PyByteArray_AS_STRING(self)[n] = value;
 
     Py_RETURN_NONE;
 }
@@ -2291,7 +2317,7 @@ bytearray_extend(PyByteArrayObject *self, PyObject *arg)
         return NULL;
 
     /* Try to determine the length of the argument. 32 is arbitrary. */
-    buf_size = _PyObject_LengthHint(arg, 32);
+    buf_size = PyObject_LengthHint(arg, 32);
     if (buf_size == -1) {
         Py_DECREF(it);
         return NULL;
@@ -2353,6 +2379,7 @@ bytearray_pop(PyByteArrayObject *self, PyObject *args)
 {
     int value;
     Py_ssize_t where = -1, n = Py_SIZE(self);
+    char *buf;
 
     if (!PyArg_ParseTuple(args, "|n:pop", &where))
         return NULL;
@@ -2371,8 +2398,9 @@ bytearray_pop(PyByteArrayObject *self, PyObject *args)
     if (!_canresize(self))
         return NULL;
 
-    value = self->ob_bytes[where];
-    memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
+    buf = PyByteArray_AS_STRING(self);
+    value = buf[where];
+    memmove(buf + where, buf + where + 1, n - where);
     if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
         return NULL;
 
@@ -2388,12 +2416,13 @@ bytearray_remove(PyByteArrayObject *self, PyObject *arg)
 {
     int value;
     Py_ssize_t where, n = Py_SIZE(self);
+    char *buf = PyByteArray_AS_STRING(self);
 
     if (! _getbytevalue(arg, &value))
         return NULL;
 
     for (where = 0; where < n; where++) {
-        if (self->ob_bytes[where] == value)
+        if (buf[where] == value)
             break;
     }
     if (where == n) {
@@ -2403,7 +2432,7 @@ bytearray_remove(PyByteArrayObject *self, PyObject *arg)
     if (!_canresize(self))
         return NULL;
 
-    memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
+    memmove(buf + where, buf + where + 1, n - where);
     if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
         return NULL;
 
@@ -2413,21 +2442,21 @@ bytearray_remove(PyByteArrayObject *self, PyObject *arg)
 /* XXX These two helpers could be optimized if argsize == 1 */
 
 static Py_ssize_t
-lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
+lstrip_helper(char *myptr, Py_ssize_t mysize,
               void *argptr, Py_ssize_t argsize)
 {
     Py_ssize_t i = 0;
-    while (i < mysize && memchr(argptr, myptr[i], argsize))
+    while (i < mysize && memchr(argptr, (unsigned char) myptr[i], argsize))
         i++;
     return i;
 }
 
 static Py_ssize_t
-rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
+rstrip_helper(char *myptr, Py_ssize_t mysize,
               void *argptr, Py_ssize_t argsize)
 {
     Py_ssize_t i = mysize - 1;
-    while (i >= 0 && memchr(argptr, myptr[i], argsize))
+    while (i >= 0 && memchr(argptr, (unsigned char) myptr[i], argsize))
         i--;
     return i + 1;
 }
@@ -2442,7 +2471,7 @@ static PyObject *
 bytearray_strip(PyByteArrayObject *self, PyObject *args)
 {
     Py_ssize_t left, right, mysize, argsize;
-    void *myptr, *argptr;
+    char *myptr, *argptr;
     PyObject *arg = Py_None;
     Py_buffer varg;
     if (!PyArg_ParseTuple(args, "|O:strip", &arg))
@@ -2452,12 +2481,12 @@ bytearray_strip(PyByteArrayObject *self, PyObject *args)
         argsize = 6;
     }
     else {
-        if (_getbuffer(arg, &varg) < 0)
+        if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
             return NULL;
-        argptr = varg.buf;
+        argptr = (char *) varg.buf;
         argsize = varg.len;
     }
-    myptr = self->ob_bytes;
+    myptr = PyByteArray_AS_STRING(self);
     mysize = Py_SIZE(self);
     left = lstrip_helper(myptr, mysize, argptr, argsize);
     if (left == mysize)
@@ -2466,7 +2495,7 @@ bytearray_strip(PyByteArrayObject *self, PyObject *args)
         right = rstrip_helper(myptr, mysize, argptr, argsize);
     if (arg != Py_None)
         PyBuffer_Release(&varg);
-    return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
+    return PyByteArray_FromStringAndSize(myptr + left, right - left);
 }
 
 PyDoc_STRVAR(lstrip__doc__,
@@ -2479,7 +2508,7 @@ static PyObject *
 bytearray_lstrip(PyByteArrayObject *self, PyObject *args)
 {
     Py_ssize_t left, right, mysize, argsize;
-    void *myptr, *argptr;
+    char *myptr, *argptr;
     PyObject *arg = Py_None;
     Py_buffer varg;
     if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
@@ -2489,18 +2518,18 @@ bytearray_lstrip(PyByteArrayObject *self, PyObject *args)
         argsize = 6;
     }
     else {
-        if (_getbuffer(arg, &varg) < 0)
+        if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
             return NULL;
-        argptr = varg.buf;
+        argptr = (char *) varg.buf;
         argsize = varg.len;
     }
-    myptr = self->ob_bytes;
+    myptr = PyByteArray_AS_STRING(self);
     mysize = Py_SIZE(self);
     left = lstrip_helper(myptr, mysize, argptr, argsize);
     right = mysize;
     if (arg != Py_None)
         PyBuffer_Release(&varg);
-    return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
+    return PyByteArray_FromStringAndSize(myptr + left, right - left);
 }
 
 PyDoc_STRVAR(rstrip__doc__,
@@ -2513,7 +2542,7 @@ static PyObject *
 bytearray_rstrip(PyByteArrayObject *self, PyObject *args)
 {
     Py_ssize_t right, mysize, argsize;
-    void *myptr, *argptr;
+    char *myptr, *argptr;
     PyObject *arg = Py_None;
     Py_buffer varg;
     if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
@@ -2523,17 +2552,17 @@ bytearray_rstrip(PyByteArrayObject *self, PyObject *args)
         argsize = 6;
     }
     else {
-        if (_getbuffer(arg, &varg) < 0)
+        if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
             return NULL;
-        argptr = varg.buf;
+        argptr = (char *) varg.buf;
         argsize = varg.len;
     }
-    myptr = self->ob_bytes;
+    myptr = PyByteArray_AS_STRING(self);
     mysize = Py_SIZE(self);
     right = rstrip_helper(myptr, mysize, argptr, argsize);
     if (arg != Py_None)
         PyBuffer_Release(&varg);
-    return PyByteArray_FromStringAndSize(self->ob_bytes, right);
+    return PyByteArray_FromStringAndSize(myptr, right);
 }
 
 PyDoc_STRVAR(decode_doc,
@@ -2578,73 +2607,9 @@ Concatenate any number of bytes/bytearray objects, with B\n\
 in between each pair, and return the result as a new bytearray.");
 
 static PyObject *
-bytearray_join(PyByteArrayObject *self, PyObject *it)
+bytearray_join(PyObject *self, PyObject *iterable)
 {
-    PyObject *seq;
-    Py_ssize_t mysize = Py_SIZE(self);
-    Py_ssize_t i;
-    Py_ssize_t n;
-    PyObject **items;
-    Py_ssize_t totalsize = 0;
-    PyObject *result;
-    char *dest;
-
-    seq = PySequence_Fast(it, "can only join an iterable");
-    if (seq == NULL)
-        return NULL;
-    n = PySequence_Fast_GET_SIZE(seq);
-    items = PySequence_Fast_ITEMS(seq);
-
-    /* Compute the total size, and check that they are all bytes */
-    /* XXX Shouldn't we use _getbuffer() on these items instead? */
-    for (i = 0; i < n; i++) {
-        PyObject *obj = items[i];
-        if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
-            PyErr_Format(PyExc_TypeError,
-                         "can only join an iterable of bytes "
-                         "(item %ld has type '%.100s')",
-                         /* XXX %ld isn't right on Win64 */
-                         (long)i, Py_TYPE(obj)->tp_name);
-            goto error;
-        }
-        if (i > 0)
-            totalsize += mysize;
-        totalsize += Py_SIZE(obj);
-        if (totalsize < 0) {
-            PyErr_NoMemory();
-            goto error;
-        }
-    }
-
-    /* Allocate the result, and copy the bytes */
-    result = PyByteArray_FromStringAndSize(NULL, totalsize);
-    if (result == NULL)
-        goto error;
-    dest = PyByteArray_AS_STRING(result);
-    for (i = 0; i < n; i++) {
-        PyObject *obj = items[i];
-        Py_ssize_t size = Py_SIZE(obj);
-        char *buf;
-        if (PyByteArray_Check(obj))
-           buf = PyByteArray_AS_STRING(obj);
-        else
-           buf = PyBytes_AS_STRING(obj);
-        if (i) {
-            memcpy(dest, self->ob_bytes, mysize);
-            dest += mysize;
-        }
-        memcpy(dest, buf, size);
-        dest += size;
-    }
-
-    /* Done */
-    Py_DECREF(seq);
-    return result;
-
-    /* Error handling */
-  error:
-    Py_DECREF(seq);
-    return NULL;
+    return stringlib_bytes_join(self, iterable);
 }
 
 PyDoc_STRVAR(splitlines__doc__,
@@ -2748,6 +2713,7 @@ _common_reduce(PyByteArrayObject *self, int proto)
 {
     PyObject *dict;
     _Py_IDENTIFIER(__dict__);
+    char *buf;
 
     dict = _PyObject_GetAttrId((PyObject *)self, &PyId___dict__);
     if (dict == NULL) {
@@ -2756,19 +2722,20 @@ _common_reduce(PyByteArrayObject *self, int proto)
         Py_INCREF(dict);
     }
 
+    buf = PyByteArray_AS_STRING(self);
     if (proto < 3) {
         /* use str based reduction for backwards compatibility with Python 2.x */
         PyObject *latin1;
-        if (self->ob_bytes)
-            latin1 = PyUnicode_DecodeLatin1(self->ob_bytes, Py_SIZE(self), NULL);
+        if (Py_SIZE(self))
+            latin1 = PyUnicode_DecodeLatin1(buf, Py_SIZE(self), NULL);
         else
             latin1 = PyUnicode_FromString("");
         return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
     }
     else {
         /* use more efficient byte based reduction */
-        if (self->ob_bytes) {
-            return Py_BuildValue("(O(y#)N)", Py_TYPE(self), self->ob_bytes, Py_SIZE(self), dict);
+        if (Py_SIZE(self)) {
+            return Py_BuildValue("(O(y#)N)", Py_TYPE(self), buf, Py_SIZE(self), dict);
         }
         else {
             return Py_BuildValue("(O()N)", Py_TYPE(self), dict);
@@ -2849,7 +2816,7 @@ bytearray_methods[] = {
     {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__},
     {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc},
     {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__},
-    {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
+    {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
      expandtabs__doc__},
     {"extend", (PyCFunction)bytearray_extend, METH_O, extend__doc__},
     {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__},
@@ -3000,7 +2967,7 @@ bytearrayiter_next(bytesiterobject *it)
 
     if (it->it_index < PyByteArray_GET_SIZE(seq)) {
         item = PyLong_FromLong(
-            (unsigned char)seq->ob_bytes[it->it_index]);
+            (unsigned char)PyByteArray_AS_STRING(seq)[it->it_index]);
         if (item != NULL)
             ++it->it_index;
         return item;
diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c
index ef3c2f7..9ff7ace 100644
--- a/Objects/bytes_methods.c
+++ b/Objects/bytes_methods.c
@@ -10,9 +10,9 @@ and there is at least one character in B, False otherwise.");
 PyObject*
 _Py_bytes_isspace(const char *cptr, Py_ssize_t len)
 {
-    register const unsigned char *p
+    const unsigned char *p
         = (unsigned char *) cptr;
-    register const unsigned char *e;
+    const unsigned char *e;
 
     /* Shortcut for single character strings */
     if (len == 1 && Py_ISSPACE(*p))
@@ -40,9 +40,9 @@ and there is at least one character in B, False otherwise.");
 PyObject*
 _Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
 {
-    register const unsigned char *p
+    const unsigned char *p
         = (unsigned char *) cptr;
-    register const unsigned char *e;
+    const unsigned char *e;
 
     /* Shortcut for single character strings */
     if (len == 1 && Py_ISALPHA(*p))
@@ -70,9 +70,9 @@ and there is at least one character in B, False otherwise.");
 PyObject*
 _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
 {
-    register const unsigned char *p
+    const unsigned char *p
         = (unsigned char *) cptr;
-    register const unsigned char *e;
+    const unsigned char *e;
 
     /* Shortcut for single character strings */
     if (len == 1 && Py_ISALNUM(*p))
@@ -100,9 +100,9 @@ and there is at least one character in B, False otherwise.");
 PyObject*
 _Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
 {
-    register const unsigned char *p
+    const unsigned char *p
         = (unsigned char *) cptr;
-    register const unsigned char *e;
+    const unsigned char *e;
 
     /* Shortcut for single character strings */
     if (len == 1 && Py_ISDIGIT(*p))
@@ -130,9 +130,9 @@ at least one cased character in B, False otherwise.");
 PyObject*
 _Py_bytes_islower(const char *cptr, Py_ssize_t len)
 {
-    register const unsigned char *p
+    const unsigned char *p
         = (unsigned char *) cptr;
-    register const unsigned char *e;
+    const unsigned char *e;
     int cased;
 
     /* Shortcut for single character strings */
@@ -164,9 +164,9 @@ at least one cased character in B, False otherwise.");
 PyObject*
 _Py_bytes_isupper(const char *cptr, Py_ssize_t len)
 {
-    register const unsigned char *p
+    const unsigned char *p
         = (unsigned char *) cptr;
-    register const unsigned char *e;
+    const unsigned char *e;
     int cased;
 
     /* Shortcut for single character strings */
@@ -200,9 +200,9 @@ otherwise.");
 PyObject*
 _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
 {
-    register const unsigned char *p
+    const unsigned char *p
         = (unsigned char *) cptr;
-    register const unsigned char *e;
+    const unsigned char *e;
     int cased, previous_is_cased;
 
     /* Shortcut for single character strings */
@@ -217,7 +217,7 @@ _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
     cased = 0;
     previous_is_cased = 0;
     for (; p < e; p++) {
-        register const unsigned char ch = *p;
+        const unsigned char ch = *p;
 
         if (Py_ISUPPER(ch)) {
             if (previous_is_cased)
@@ -363,41 +363,20 @@ for use in the bytes or bytearray translate method where each byte\n\
 in frm is mapped to the byte at the same position in to.\n\
 The bytes objects frm and to must be of the same length.");
 
-static Py_ssize_t
-_getbuffer(PyObject *obj, Py_buffer *view)
-{
-    PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
-
-    if (buffer == NULL || buffer->bf_getbuffer == NULL)
-    {
-        PyErr_Format(PyExc_TypeError,
-                     "Type %.100s doesn't support the buffer API",
-                     Py_TYPE(obj)->tp_name);
-        return -1;
-    }
-
-    if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
-        return -1;
-    return view->len;
-}
-
 PyObject *
 _Py_bytes_maketrans(PyObject *args)
 {
-    PyObject *frm, *to, *res = NULL;
-    Py_buffer bfrm, bto;
+    PyObject *res = NULL;
+    Py_buffer bfrm = {NULL, NULL};
+    Py_buffer bto = {NULL, NULL};
     Py_ssize_t i;
     char *p;
 
     bfrm.len = -1;
     bto.len = -1;
 
-    if (!PyArg_ParseTuple(args, "OO:maketrans", &frm, &to))
-        return NULL;
-    if (_getbuffer(frm, &bfrm) < 0)
+    if (!PyArg_ParseTuple(args, "y*y*:maketrans", &bfrm, &bto))
         return NULL;
-    if (_getbuffer(to, &bto) < 0)
-        goto done;
     if (bfrm.len != bto.len) {
         PyErr_Format(PyExc_ValueError,
                      "maketrans arguments must have same length");
@@ -415,9 +394,9 @@ _Py_bytes_maketrans(PyObject *args)
     }
 
 done:
-    if (bfrm.len != -1)
+    if (bfrm.obj != NULL)
         PyBuffer_Release(&bfrm);
-    if (bto.len != -1)
+    if (bfrm.obj != NULL)
         PyBuffer_Release(&bto);
     return res;
 }
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 32c5d71..27f4069 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -7,24 +7,6 @@
 #include "bytes_methods.h"
 #include <stddef.h>
 
-static Py_ssize_t
-_getbuffer(PyObject *obj, Py_buffer *view)
-{
-    PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
-
-    if (buffer == NULL || buffer->bf_getbuffer == NULL)
-    {
-        PyErr_Format(PyExc_TypeError,
-                     "Type %.100s doesn't support the buffer API",
-                     Py_TYPE(obj)->tp_name);
-        return -1;
-    }
-
-    if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
-        return -1;
-    return view->len;
-}
-
 #ifdef COUNT_ALLOCS
 Py_ssize_t null_strings, one_strings;
 #endif
@@ -65,7 +47,7 @@ static PyBytesObject *nullstring;
 PyObject *
 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
 {
-    register PyBytesObject *op;
+    PyBytesObject *op;
     if (size < 0) {
         PyErr_SetString(PyExc_SystemError,
             "Negative size passed to PyBytes_FromStringAndSize");
@@ -98,7 +80,7 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
     if (op == NULL)
         return PyErr_NoMemory();
-    PyObject_INIT_VAR(op, &PyBytes_Type, size);
+    (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
     op->ob_shash = -1;
     if (str != NULL)
         Py_MEMCPY(op->ob_sval, str, size);
@@ -117,8 +99,8 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
 PyObject *
 PyBytes_FromString(const char *str)
 {
-    register size_t size;
-    register PyBytesObject *op;
+    size_t size;
+    PyBytesObject *op;
 
     assert(str != NULL);
     size = strlen(str);
@@ -146,7 +128,7 @@ PyBytes_FromString(const char *str)
     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
     if (op == NULL)
         return PyErr_NoMemory();
-    PyObject_INIT_VAR(op, &PyBytes_Type, size);
+    (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
     op->ob_shash = -1;
     Py_MEMCPY(op->ob_sval, str, size+1);
     /* share short strings */
@@ -517,7 +499,7 @@ PyObject *PyBytes_DecodeEscape(const char *s,
 /* object api */
 
 Py_ssize_t
-PyBytes_Size(register PyObject *op)
+PyBytes_Size(PyObject *op)
 {
     if (!PyBytes_Check(op)) {
         PyErr_Format(PyExc_TypeError,
@@ -528,7 +510,7 @@ PyBytes_Size(register PyObject *op)
 }
 
 char *
-PyBytes_AsString(register PyObject *op)
+PyBytes_AsString(PyObject *op)
 {
     if (!PyBytes_Check(op)) {
         PyErr_Format(PyExc_TypeError,
@@ -539,9 +521,9 @@ PyBytes_AsString(register PyObject *op)
 }
 
 int
-PyBytes_AsStringAndSize(register PyObject *obj,
-                         register char **s,
-                         register Py_ssize_t *len)
+PyBytes_AsStringAndSize(PyObject *obj,
+                         char **s,
+                         Py_ssize_t *len)
 {
     if (s == NULL) {
         PyErr_BadInternalCall();
@@ -573,6 +555,7 @@ PyBytes_AsStringAndSize(register PyObject *obj,
 #include "stringlib/fastsearch.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
+#include "stringlib/join.h"
 #include "stringlib/partition.h"
 #include "stringlib/split.h"
 #include "stringlib/ctype.h"
@@ -582,7 +565,7 @@ PyBytes_AsStringAndSize(register PyObject *obj,
 PyObject *
 PyBytes_Repr(PyObject *obj, int smartquotes)
 {
-    register PyBytesObject* op = (PyBytesObject*) obj;
+    PyBytesObject* op = (PyBytesObject*) obj;
     Py_ssize_t i, length = Py_SIZE(op);
     Py_ssize_t newsize, squotes, dquotes;
     PyObject *v;
@@ -685,8 +668,8 @@ bytes_concat(PyObject *a, PyObject *b)
 
     va.len = -1;
     vb.len = -1;
-    if (_getbuffer(a, &va) < 0  ||
-        _getbuffer(b, &vb) < 0) {
+    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
+        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
                      Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
         goto done;
@@ -725,12 +708,12 @@ bytes_concat(PyObject *a, PyObject *b)
 }
 
 static PyObject *
-bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
+bytes_repeat(PyBytesObject *a, Py_ssize_t n)
 {
-    register Py_ssize_t i;
-    register Py_ssize_t j;
-    register Py_ssize_t size;
-    register PyBytesObject *op;
+    Py_ssize_t i;
+    Py_ssize_t j;
+    Py_ssize_t size;
+    PyBytesObject *op;
     size_t nbytes;
     if (n < 0)
         n = 0;
@@ -756,7 +739,7 @@ bytes_repeat(register PyBytesObject *a, register Py_ssize_t n)
     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
     if (op == NULL)
         return PyErr_NoMemory();
-    PyObject_INIT_VAR(op, &PyBytes_Type, size);
+    (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
     op->ob_shash = -1;
     op->ob_sval[size] = '\0';
     if (Py_SIZE(a) == 1 && n > 0) {
@@ -784,7 +767,7 @@ bytes_contains(PyObject *self, PyObject *arg)
         Py_buffer varg;
         Py_ssize_t pos;
         PyErr_Clear();
-        if (_getbuffer(arg, &varg) < 0)
+        if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
             return -1;
         pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self),
                              varg.buf, varg.len, 0);
@@ -800,7 +783,7 @@ bytes_contains(PyObject *self, PyObject *arg)
 }
 
 static PyObject *
-bytes_item(PyBytesObject *a, register Py_ssize_t i)
+bytes_item(PyBytesObject *a, Py_ssize_t i)
 {
     if (i < 0 || i >= Py_SIZE(a)) {
         PyErr_SetString(PyExc_IndexError, "index out of range");
@@ -809,6 +792,23 @@ bytes_item(PyBytesObject *a, register Py_ssize_t i)
     return PyLong_FromLong((unsigned char)a->ob_sval[i]);
 }
 
+Py_LOCAL(int)
+bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
+{
+    int cmp;
+    Py_ssize_t len;
+
+    len = Py_SIZE(a);
+    if (Py_SIZE(b) != len)
+        return 0;
+
+    if (a->ob_sval[0] != b->ob_sval[0])
+        return 0;
+
+    cmp = memcmp(a->ob_sval, b->ob_sval, len);
+    return (cmp == 0);
+}
+
 static PyObject*
 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
 {
@@ -816,66 +816,74 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
     Py_ssize_t len_a, len_b;
     Py_ssize_t min_len;
     PyObject *result;
+    int rc;
 
     /* Make sure both arguments are strings. */
     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
-        if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE) &&
-            (PyObject_IsInstance((PyObject*)a,
-                                 (PyObject*)&PyUnicode_Type) ||
-            PyObject_IsInstance((PyObject*)b,
-                                 (PyObject*)&PyUnicode_Type))) {
-            if (PyErr_WarnEx(PyExc_BytesWarning,
-                        "Comparison between bytes and string", 1))
+        if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
+            rc = PyObject_IsInstance((PyObject*)a,
+                                     (PyObject*)&PyUnicode_Type);
+            if (!rc)
+                rc = PyObject_IsInstance((PyObject*)b,
+                                         (PyObject*)&PyUnicode_Type);
+            if (rc < 0)
                 return NULL;
+            if (rc) {
+                if (PyErr_WarnEx(PyExc_BytesWarning,
+                                 "Comparison between bytes and string", 1))
+                    return NULL;
+            }
         }
         result = Py_NotImplemented;
-        goto out;
     }
-    if (a == b) {
+    else if (a == b) {
         switch (op) {
-        case Py_EQ:case Py_LE:case Py_GE:
+        case Py_EQ:
+        case Py_LE:
+        case Py_GE:
+            /* a string is equal to itself */
             result = Py_True;
-            goto out;
-        case Py_NE:case Py_LT:case Py_GT:
+            break;
+        case Py_NE:
+        case Py_LT:
+        case Py_GT:
             result = Py_False;
-            goto out;
+            break;
+        default:
+            PyErr_BadArgument();
+            return NULL;
         }
     }
-    if (op == Py_EQ) {
-        /* Supporting Py_NE here as well does not save
-           much time, since Py_NE is rarely used.  */
-        if (Py_SIZE(a) == Py_SIZE(b)
-            && (a->ob_sval[0] == b->ob_sval[0]
-            && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
-            result = Py_True;
-        } else {
-            result = Py_False;
+    else if (op == Py_EQ || op == Py_NE) {
+        int eq = bytes_compare_eq(a, b);
+        eq ^= (op == Py_NE);
+        result = eq ? Py_True : Py_False;
+    }
+    else {
+        len_a = Py_SIZE(a);
+        len_b = Py_SIZE(b);
+        min_len = Py_MIN(len_a, len_b);
+        if (min_len > 0) {
+            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
+            if (c == 0)
+                c = memcmp(a->ob_sval, b->ob_sval, min_len);
         }
-        goto out;
-    }
-    len_a = Py_SIZE(a); len_b = Py_SIZE(b);
-    min_len = (len_a < len_b) ? len_a : len_b;
-    if (min_len > 0) {
-        c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
-        if (c==0)
-            c = memcmp(a->ob_sval, b->ob_sval, min_len);
-    } else
-        c = 0;
-    if (c == 0)
-        c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
-    switch (op) {
-    case Py_LT: c = c <  0; break;
-    case Py_LE: c = c <= 0; break;
-    case Py_EQ: assert(0);  break; /* unreachable */
-    case Py_NE: c = c != 0; break;
-    case Py_GT: c = c >  0; break;
-    case Py_GE: c = c >= 0; break;
-    default:
-        result = Py_NotImplemented;
-        goto out;
+        else
+            c = 0;
+        if (c == 0)
+            c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
+        switch (op) {
+        case Py_LT: c = c <  0; break;
+        case Py_LE: c = c <= 0; break;
+        case Py_GT: c = c >  0; break;
+        case Py_GE: c = c >= 0; break;
+        default:
+            PyErr_BadArgument();
+            return NULL;
+        }
+        result = c ? Py_True : Py_False;
     }
-    result = c ? Py_True : Py_False;
-  out:
+
     Py_INCREF(result);
     return result;
 }
@@ -885,7 +893,7 @@ bytes_hash(PyBytesObject *a)
 {
     if (a->ob_shash == -1) {
         /* Can't fail */
-        a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
+        a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
     }
     return a->ob_shash;
 }
@@ -1019,7 +1027,7 @@ bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
         maxsplit = PY_SSIZE_T_MAX;
     if (subobj == Py_None)
         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
-    if (_getbuffer(subobj, &vsub) < 0)
+    if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0)
         return NULL;
     sub = vsub.buf;
     n = vsub.len;
@@ -1039,21 +1047,19 @@ found, returns B and two empty bytes objects.");
 static PyObject *
 bytes_partition(PyBytesObject *self, PyObject *sep_obj)
 {
-    const char *sep;
-    Py_ssize_t sep_len;
+    Py_buffer sep = {NULL, NULL};
+    PyObject *res;
 
-    if (PyBytes_Check(sep_obj)) {
-        sep = PyBytes_AS_STRING(sep_obj);
-        sep_len = PyBytes_GET_SIZE(sep_obj);
-    }
-    else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
+    if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0)
         return NULL;
 
-    return stringlib_partition(
+    res = stringlib_partition(
         (PyObject*) self,
         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
-        sep_obj, sep, sep_len
+        sep_obj, sep.buf, sep.len
         );
+    PyBuffer_Release(&sep);
+    return res;
 }
 
 PyDoc_STRVAR(rpartition__doc__,
@@ -1067,21 +1073,19 @@ bytes objects and B.");
 static PyObject *
 bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
 {
-    const char *sep;
-    Py_ssize_t sep_len;
+    Py_buffer sep = {NULL, NULL};
+    PyObject *res;
 
-    if (PyBytes_Check(sep_obj)) {
-        sep = PyBytes_AS_STRING(sep_obj);
-        sep_len = PyBytes_GET_SIZE(sep_obj);
-    }
-    else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
+    if (PyObject_GetBuffer(sep_obj, &sep, PyBUF_SIMPLE) != 0)
         return NULL;
 
-    return stringlib_rpartition(
+    res = stringlib_rpartition(
         (PyObject*) self,
         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
-        sep_obj, sep, sep_len
+        sep_obj, sep.buf, sep.len
         );
+    PyBuffer_Release(&sep);
+    return res;
 }
 
 PyDoc_STRVAR(rsplit__doc__,
@@ -1111,7 +1115,7 @@ bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
         maxsplit = PY_SSIZE_T_MAX;
     if (subobj == Py_None)
         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
-    if (_getbuffer(subobj, &vsub) < 0)
+    if (PyObject_GetBuffer(subobj, &vsub, PyBUF_SIMPLE) != 0)
         return NULL;
     sub = vsub.buf;
     n = vsub.len;
@@ -1129,94 +1133,9 @@ Concatenate any number of bytes objects, with B in between each pair.\n\
 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.");
 
 static PyObject *
-bytes_join(PyObject *self, PyObject *orig)
-{
-    char *sep = PyBytes_AS_STRING(self);
-    const Py_ssize_t seplen = PyBytes_GET_SIZE(self);
-    PyObject *res = NULL;
-    char *p;
-    Py_ssize_t seqlen = 0;
-    size_t sz = 0;
-    Py_ssize_t i;
-    PyObject *seq, *item;
-
-    seq = PySequence_Fast(orig, "");
-    if (seq == NULL) {
-        return NULL;
-    }
-
-    seqlen = PySequence_Size(seq);
-    if (seqlen == 0) {
-        Py_DECREF(seq);
-        return PyBytes_FromString("");
-    }
-    if (seqlen == 1) {
-        item = PySequence_Fast_GET_ITEM(seq, 0);
-        if (PyBytes_CheckExact(item)) {
-            Py_INCREF(item);
-            Py_DECREF(seq);
-            return item;
-        }
-    }
-
-    /* There are at least two things to join, or else we have a subclass
-     * of the builtin types in the sequence.
-     * Do a pre-pass to figure out the total amount of space we'll
-     * need (sz), and see whether all argument are bytes.
-     */
-    /* XXX Shouldn't we use _getbuffer() on these items instead? */
-    for (i = 0; i < seqlen; i++) {
-        const size_t old_sz = sz;
-        item = PySequence_Fast_GET_ITEM(seq, i);
-        if (!PyBytes_Check(item) && !PyByteArray_Check(item)) {
-            PyErr_Format(PyExc_TypeError,
-                         "sequence item %zd: expected bytes,"
-                         " %.80s found",
-                         i, Py_TYPE(item)->tp_name);
-            Py_DECREF(seq);
-            return NULL;
-        }
-        sz += Py_SIZE(item);
-        if (i != 0)
-            sz += seplen;
-        if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
-            PyErr_SetString(PyExc_OverflowError,
-                "join() result is too long for bytes");
-            Py_DECREF(seq);
-            return NULL;
-        }
-    }
-
-    /* Allocate result space. */
-    res = PyBytes_FromStringAndSize((char*)NULL, sz);
-    if (res == NULL) {
-        Py_DECREF(seq);
-        return NULL;
-    }
-
-    /* Catenate everything. */
-    /* I'm not worried about a PyByteArray item growing because there's
-       nowhere in this function where we release the GIL. */
-    p = PyBytes_AS_STRING(res);
-    for (i = 0; i < seqlen; ++i) {
-        size_t n;
-        char *q;
-        if (i) {
-            Py_MEMCPY(p, sep, seplen);
-            p += seplen;
-        }
-        item = PySequence_Fast_GET_ITEM(seq, i);
-        n = Py_SIZE(item);
-        if (PyBytes_Check(item))
-            q = PyBytes_AS_STRING(item);
-        else
-            q = PyByteArray_AS_STRING(item);
-        Py_MEMCPY(p, q, n);
-        p += n;
-    }
-
-    Py_DECREF(seq);
-    return res;
+bytes_join(PyObject *self, PyObject *iterable)
+{
+    return stringlib_bytes_join(self, iterable);
 }
 
 PyObject *
@@ -1258,7 +1177,7 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
         return -2;
 
     if (subobj) {
-        if (_getbuffer(subobj, &subbuf) < 0)
+        if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
             return -2;
 
         sub = subbuf.buf;
@@ -1373,7 +1292,7 @@ do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
     Py_ssize_t seplen;
     Py_ssize_t i, j;
 
-    if (_getbuffer(sepobj, &vsep) < 0)
+    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
         return NULL;
     sep = vsep.buf;
     seplen = vsep.len;
@@ -1439,7 +1358,7 @@ do_argstrip(PyBytesObject *self, int striptype, PyObject *args)
 {
     PyObject *sep = NULL;
 
-    if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
+    if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
         return NULL;
 
     if (sep != NULL && sep != Py_None) {
@@ -1518,7 +1437,7 @@ bytes_count(PyBytesObject *self, PyObject *args)
         return NULL;
 
     if (sub_obj) {
-        if (_getbuffer(sub_obj, &vsub) < 0)
+        if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
             return NULL;
 
         sub = vsub.buf;
@@ -1553,9 +1472,11 @@ table, which must be a bytes object of length 256.");
 static PyObject *
 bytes_translate(PyBytesObject *self, PyObject *args)
 {
-    register char *input, *output;
+    char *input, *output;
+    Py_buffer table_view = {NULL, NULL};
+    Py_buffer del_table_view = {NULL, NULL};
     const char *table;
-    register Py_ssize_t i, c, changed = 0;
+    Py_ssize_t i, c, changed = 0;
     PyObject *input_obj = (PyObject*)self;
     const char *output_start, *del_table=NULL;
     Py_ssize_t inlen, tablen, dellen = 0;
@@ -1575,12 +1496,17 @@ bytes_translate(PyBytesObject *self, PyObject *args)
         table = NULL;
         tablen = 256;
     }
-    else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
-        return NULL;
+    else {
+        if (PyObject_GetBuffer(tableobj, &table_view, PyBUF_SIMPLE) != 0)
+            return NULL;
+        table = table_view.buf;
+        tablen = table_view.len;
+    }
 
     if (tablen != 256) {
         PyErr_SetString(PyExc_ValueError,
           "translation table must be 256 characters long");
+        PyBuffer_Release(&table_view);
         return NULL;
     }
 
@@ -1589,8 +1515,14 @@ bytes_translate(PyBytesObject *self, PyObject *args)
             del_table = PyBytes_AS_STRING(delobj);
             dellen = PyBytes_GET_SIZE(delobj);
         }
-        else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
-            return NULL;
+        else {
+            if (PyObject_GetBuffer(delobj, &del_table_view, PyBUF_SIMPLE) != 0) {
+                PyBuffer_Release(&table_view);
+                return NULL;
+            }
+            del_table = del_table_view.buf;
+            dellen = del_table_view.len;
+        }
     }
     else {
         del_table = NULL;
@@ -1599,8 +1531,11 @@ bytes_translate(PyBytesObject *self, PyObject *args)
 
     inlen = PyBytes_GET_SIZE(input_obj);
     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
-    if (result == NULL)
+    if (result == NULL) {
+        PyBuffer_Release(&del_table_view);
+        PyBuffer_Release(&table_view);
         return NULL;
+    }
     output_start = output = PyBytes_AsString(result);
     input = PyBytes_AS_STRING(input_obj);
 
@@ -1611,11 +1546,14 @@ bytes_translate(PyBytesObject *self, PyObject *args)
             if (Py_CHARMASK((*output++ = table[c])) != c)
                 changed = 1;
         }
-        if (changed || !PyBytes_CheckExact(input_obj))
-            return result;
-        Py_DECREF(result);
-        Py_INCREF(input_obj);
-        return input_obj;
+        if (!changed && PyBytes_CheckExact(input_obj)) {
+            Py_INCREF(input_obj);
+            Py_DECREF(result);
+            result = input_obj;
+        }
+        PyBuffer_Release(&del_table_view);
+        PyBuffer_Release(&table_view);
+        return result;
     }
 
     if (table == NULL) {
@@ -1625,9 +1563,11 @@ bytes_translate(PyBytesObject *self, PyObject *args)
         for (i = 0; i < 256; i++)
             trans_table[i] = Py_CHARMASK(table[i]);
     }
+    PyBuffer_Release(&table_view);
 
     for (i = 0; i < dellen; i++)
         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
+    PyBuffer_Release(&del_table_view);
 
     for (i = inlen; --i >= 0; ) {
         c = Py_CHARMASK(*input++);
@@ -2156,31 +2096,21 @@ given, only first count occurances are replaced.");
 static PyObject *
 bytes_replace(PyBytesObject *self, PyObject *args)
 {
+    PyObject *res;
+    Py_buffer old = {NULL, NULL};
+    Py_buffer new = {NULL, NULL};
     Py_ssize_t count = -1;
-    PyObject *from, *to;
-    const char *from_s, *to_s;
-    Py_ssize_t from_len, to_len;
 
-    if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
+    if (!PyArg_ParseTuple(args, "y*y*|n:replace", &old, &new, &count))
         return NULL;
 
-    if (PyBytes_Check(from)) {
-        from_s = PyBytes_AS_STRING(from);
-        from_len = PyBytes_GET_SIZE(from);
-    }
-    else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
-        return NULL;
-
-    if (PyBytes_Check(to)) {
-        to_s = PyBytes_AS_STRING(to);
-        to_len = PyBytes_GET_SIZE(to);
-    }
-    else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
-        return NULL;
+    res = (PyObject *)replace((PyBytesObject *) self,
+                              (const char *)old.buf, old.len,
+                              (const char *)new.buf, new.len, count);
 
-    return (PyObject *)replace((PyBytesObject *) self,
-                               from_s, from_len,
-                               to_s, to_len, count);
+    PyBuffer_Release(&old);
+    PyBuffer_Release(&new);
+    return res;
 }
 
 /** End DALKE **/
@@ -2195,6 +2125,7 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
 {
     Py_ssize_t len = PyBytes_GET_SIZE(self);
     Py_ssize_t slen;
+    Py_buffer sub_view = {NULL, NULL};
     const char* sub;
     const char* str;
 
@@ -2202,8 +2133,12 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
         sub = PyBytes_AS_STRING(substr);
         slen = PyBytes_GET_SIZE(substr);
     }
-    else if (PyObject_AsCharBuffer(substr, &sub, &slen))
-        return -1;
+    else {
+        if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
+            return -1;
+        sub = sub_view.buf;
+        slen = sub_view.len;
+    }
     str = PyBytes_AS_STRING(self);
 
     ADJUST_INDICES(start, end, len);
@@ -2211,17 +2146,25 @@ _bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start,
     if (direction < 0) {
         /* startswith */
         if (start+slen > len)
-            return 0;
+            goto notfound;
     } else {
         /* endswith */
         if (end-start < slen || start > len)
-            return 0;
+            goto notfound;
 
         if (end-slen > start)
             start = end - slen;
     }
-    if (end-start >= slen)
-        return ! memcmp(str+start, sub, slen);
+    if (end-start < slen)
+        goto notfound;
+    if (memcmp(str+start, sub, slen) != 0)
+        goto notfound;
+
+    PyBuffer_Release(&sub_view);
+    return 1;
+
+notfound:
+    PyBuffer_Release(&sub_view);
     return 0;
 }
 
@@ -2333,8 +2276,6 @@ bytes_decode(PyObject *self, PyObject *args, PyObject *kwargs)
 
     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
         return NULL;
-    if (encoding == NULL)
-        encoding = PyUnicode_GetDefaultEncoding();
     return PyUnicode_FromEncodedObject(self, encoding, errors);
 }
 
@@ -2435,18 +2376,6 @@ bytes_fromhex(PyObject *cls, PyObject *args)
     return NULL;
 }
 
-PyDoc_STRVAR(sizeof__doc__,
-"B.__sizeof__() -> size of B in memory, in bytes");
-
-static PyObject *
-bytes_sizeof(PyBytesObject *v)
-{
-    Py_ssize_t res;
-    res = PyBytesObject_SIZE + Py_SIZE(v) * Py_TYPE(v)->tp_itemsize;
-    return PyLong_FromSsize_t(res);
-}
-
-
 static PyObject *
 bytes_getnewargs(PyBytesObject *v)
 {
@@ -2464,7 +2393,7 @@ bytes_methods[] = {
     {"decode", (PyCFunction)bytes_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
      endswith__doc__},
-    {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
+    {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
      expandtabs__doc__},
     {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__},
     {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
@@ -2512,13 +2441,11 @@ bytes_methods[] = {
      translate__doc__},
     {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
     {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
-    {"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
-     sizeof__doc__},
     {NULL,     NULL}                         /* sentinel */
 };
 
 static PyObject *
-str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
 
 static PyObject *
 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
@@ -2533,7 +2460,7 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     _Py_IDENTIFIER(__bytes__);
 
     if (type != &PyBytes_Type)
-        return str_subtype_new(type, args, kwds);
+        return bytes_subtype_new(type, args, kwds);
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
                                      &encoding, &errors))
         return NULL;
@@ -2561,6 +2488,13 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         return new;
     }
 
+    /* If it's not unicode, there can't be encoding or errors */
+    if (encoding != NULL || errors != NULL) {
+        PyErr_SetString(PyExc_TypeError,
+            "encoding or errors without a string argument");
+        return NULL;
+    }
+
     /* We'd like to call PyObject_Bytes here, but we need to check for an
        integer argument before deferring to PyBytes_FromObject, something
        PyObject_Bytes doesn't do. */
@@ -2602,13 +2536,6 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         return new;
     }
 
-    /* If it's not unicode, there can't be encoding or errors */
-    if (encoding != NULL || errors != NULL) {
-        PyErr_SetString(PyExc_TypeError,
-            "encoding or errors without a string argument");
-        return NULL;
-    }
-
     return PyBytes_FromObject(x);
 }
 
@@ -2696,7 +2623,7 @@ PyBytes_FromObject(PyObject *x)
     }
 
     /* For iterator version, create a string object and resize as needed */
-    size = _PyObject_LengthHint(x, 64);
+    size = PyObject_LengthHint(x, 64);
     if (size == -1 && PyErr_Occurred())
         return NULL;
     /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
@@ -2754,14 +2681,13 @@ PyBytes_FromObject(PyObject *x)
     return new;
 
   error:
-    /* Error handling when new != NULL */
     Py_XDECREF(it);
-    Py_DECREF(new);
+    Py_XDECREF(new);
     return NULL;
 }
 
 static PyObject *
-str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
     PyObject *tmp, *pnew;
     Py_ssize_t n;
@@ -2770,7 +2696,7 @@ str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     tmp = bytes_new(&PyBytes_Type, args, kwds);
     if (tmp == NULL)
         return NULL;
-    assert(PyBytes_CheckExact(tmp));
+    assert(PyBytes_Check(tmp));
     n = PyBytes_GET_SIZE(tmp);
     pnew = type->tp_alloc(type, n);
     if (pnew != NULL) {
@@ -2842,9 +2768,9 @@ PyTypeObject PyBytes_Type = {
 };
 
 void
-PyBytes_Concat(register PyObject **pv, register PyObject *w)
+PyBytes_Concat(PyObject **pv, PyObject *w)
 {
-    register PyObject *v;
+    PyObject *v;
     assert(pv != NULL);
     if (*pv == NULL)
         return;
@@ -2858,7 +2784,7 @@ PyBytes_Concat(register PyObject **pv, register PyObject *w)
 }
 
 void
-PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
+PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
 {
     PyBytes_Concat(pv, w);
     Py_XDECREF(w);
@@ -2882,8 +2808,8 @@ PyBytes_ConcatAndDel(register PyObject **pv, register PyObject *w)
 int
 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
 {
-    register PyObject *v;
-    register PyBytesObject *sv;
+    PyObject *v;
+    PyBytesObject *sv;
     v = *pv;
     if (!PyBytes_Check(v) || Py_REFCNT(v) != 1 || newsize < 0) {
         *pv = 0;
diff --git a/Objects/classobject.c b/Objects/classobject.c
index cdc9b1c..0c0bd47 100644
--- a/Objects/classobject.c
+++ b/Objects/classobject.c
@@ -44,7 +44,7 @@ PyMethod_Self(PyObject *im)
 PyObject *
 PyMethod_New(PyObject *func, PyObject *self)
 {
-    register PyMethodObject *im;
+    PyMethodObject *im;
     if (self == NULL) {
         PyErr_BadInternalCall();
         return NULL;
@@ -52,7 +52,7 @@ PyMethod_New(PyObject *func, PyObject *self)
     im = free_list;
     if (im != NULL) {
         free_list = (PyMethodObject *)(im->im_self);
-        PyObject_INIT(im, &PyMethod_Type);
+        (void)PyObject_INIT(im, &PyMethod_Type);
         numfree--;
     }
     else {
@@ -69,6 +69,30 @@ PyMethod_New(PyObject *func, PyObject *self)
     return (PyObject *)im;
 }
 
+static PyObject *
+method_reduce(PyMethodObject *im)
+{
+    PyObject *self = PyMethod_GET_SELF(im);
+    PyObject *func = PyMethod_GET_FUNCTION(im);
+    PyObject *builtins;
+    PyObject *getattr;
+    PyObject *funcname;
+    _Py_IDENTIFIER(getattr);
+
+    funcname = _PyObject_GetAttrId(func, &PyId___name__);
+    if (funcname == NULL) {
+        return NULL;
+    }
+    builtins = PyEval_GetBuiltins();
+    getattr = _PyDict_GetItemId(builtins, &PyId_getattr);
+    return Py_BuildValue("O(ON)", getattr, self, funcname);
+}
+
+static PyMethodDef method_methods[] = {
+    {"__reduce__", (PyCFunction)method_reduce, METH_NOARGS, NULL},
+    {NULL, NULL}
+};
+
 /* Descriptors for PyMethod attributes */
 
 /* im_func and im_self are stored in the PyMethod object */
@@ -164,7 +188,7 @@ method_new(PyTypeObject* type, PyObject* args, PyObject *kw)
 }
 
 static void
-method_dealloc(register PyMethodObject *im)
+method_dealloc(PyMethodObject *im)
 {
     _PyObject_GC_UNTRACK(im);
     if (im->im_weakreflist != NULL)
@@ -367,7 +391,7 @@ PyTypeObject PyMethod_Type = {
     offsetof(PyMethodObject, im_weakreflist), /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
-    0,                                          /* tp_methods */
+    method_methods,                             /* tp_methods */
     method_memberlist,                          /* tp_members */
     method_getset,                              /* tp_getset */
     0,                                          /* tp_base */
@@ -509,7 +533,7 @@ instancemethod_call(PyObject *self, PyObject *arg, PyObject *kw)
 
 static PyObject *
 instancemethod_descr_get(PyObject *descr, PyObject *obj, PyObject *type) {
-    register PyObject *func = PyInstanceMethod_GET_FUNCTION(descr);
+    PyObject *func = PyInstanceMethod_GET_FUNCTION(descr);
     if (obj == NULL) {
         Py_INCREF(func);
         return func;
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 9713f61..353f414 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -74,6 +74,11 @@ PyCode_New(int argcount, int kwonlyargcount,
         PyErr_BadInternalCall();
         return NULL;
     }
+
+    /* Ensure that the filename is a ready Unicode string */
+    if (PyUnicode_READY(filename) < 0)
+        return NULL;
+
     n_cellvars = PyTuple_GET_SIZE(cellvars);
     intern_strings(names);
     intern_strings(varnames);
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index b08aa6f..7aaaeab 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -78,7 +78,7 @@ c_quot(Py_complex a, Py_complex b)
      const double abs_breal = b.real < 0 ? -b.real : b.real;
      const double abs_bimag = b.imag < 0 ? -b.imag : b.imag;
 
-     if (abs_breal >= abs_bimag) {
+    if (abs_breal >= abs_bimag) {
         /* divide tops and bottom by b.real */
         if (abs_breal == 0.0) {
             errno = EDOM;
@@ -91,7 +91,7 @@ c_quot(Py_complex a, Py_complex b)
             r.imag = (a.imag - a.real * ratio) / denom;
         }
     }
-    else {
+    else if (abs_bimag >= abs_breal) {
         /* divide tops and bottom by b.imag */
         const double ratio = b.real / b.imag;
         const double denom = b.real * ratio + b.imag;
@@ -99,6 +99,10 @@ c_quot(Py_complex a, Py_complex b)
         r.real = (a.real * ratio + a.imag) / denom;
         r.imag = (a.imag * ratio - a.real) / denom;
     }
+    else {
+        /* At least one of b.real or b.imag is a NaN */
+        r.real = r.imag = Py_NAN;
+    }
     return r;
 }
 
@@ -211,13 +215,13 @@ complex_subtype_from_c_complex(PyTypeObject *type, Py_complex cval)
 PyObject *
 PyComplex_FromCComplex(Py_complex cval)
 {
-    register PyComplexObject *op;
+    PyComplexObject *op;
 
     /* Inline PyObject_New */
     op = (PyComplexObject *) PyObject_MALLOC(sizeof(PyComplexObject));
     if (op == NULL)
         return PyErr_NoMemory();
-    PyObject_INIT(op, &PyComplex_Type);
+    (void)PyObject_INIT(op, &PyComplex_Type);
     op->cval = cval;
     return (PyObject *) op;
 }
@@ -271,6 +275,12 @@ try_complex_special_method(PyObject *op) {
     if (f) {
         PyObject *res = PyObject_CallFunctionObjArgs(f, NULL);
         Py_DECREF(f);
+        if (res != NULL && !PyComplex_Check(res)) {
+            PyErr_SetString(PyExc_TypeError,
+                "__complex__ should return a complex object");
+            Py_DECREF(res);
+            return NULL;
+        }
         return res;
     }
     return NULL;
@@ -296,12 +306,6 @@ PyComplex_AsCComplex(PyObject *op)
     newop = try_complex_special_method(op);
 
     if (newop) {
-        if (!PyComplex_Check(newop)) {
-            PyErr_SetString(PyExc_TypeError,
-                "__complex__ should return a complex object");
-            Py_DECREF(newop);
-            return cv;
-        }
         cv = ((PyComplexObject *)newop)->cval;
         Py_DECREF(newop);
         return cv;
@@ -705,7 +709,7 @@ complex__format__(PyObject* self, PyObject* args)
     if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
         return NULL;
 
-    _PyUnicodeWriter_Init(&writer, 0);
+    _PyUnicodeWriter_Init(&writer);
     ret = _PyComplex_FormatAdvancedWriter(
         &writer,
         self,
@@ -772,9 +776,10 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
         if (s == NULL)
             goto error;
     }
-    else if (PyObject_AsCharBuffer(v, &s, &len)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "complex() argument must be a string or a number");
+    else {
+        PyErr_Format(PyExc_TypeError,
+            "complex() argument must be a string or a number, not '%.200s'",
+            Py_TYPE(v)->tp_name);
         return NULL;
     }
 
@@ -953,8 +958,9 @@ complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         nbi = i->ob_type->tp_as_number;
     if (nbr == NULL || nbr->nb_float == NULL ||
         ((i != NULL) && (nbi == NULL || nbi->nb_float == NULL))) {
-        PyErr_SetString(PyExc_TypeError,
-                   "complex() argument must be a string or a number");
+        PyErr_Format(PyExc_TypeError,
+            "complex() argument must be a string or a number, not '%.200s'",
+            Py_TYPE(r)->tp_name);
         if (own_r) {
             Py_DECREF(r);
         }
diff --git a/Objects/descrobject.c b/Objects/descrobject.c
index ba98a30..615f499 100644
--- a/Objects/descrobject.c
+++ b/Objects/descrobject.c
@@ -115,7 +115,7 @@ classmethod_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type)
                      ((PyTypeObject *)type)->tp_name);
         return NULL;
     }
-    return PyCFunction_New(descr->d_method, type);
+    return PyCFunction_NewEx(descr->d_method, type, NULL);
 }
 
 static PyObject *
@@ -125,7 +125,7 @@ method_get(PyMethodDescrObject *descr, PyObject *obj, PyObject *type)
 
     if (descr_check((PyDescrObject *)descr, obj, &res))
         return res;
-    return PyCFunction_New(descr->d_method, obj);
+    return PyCFunction_NewEx(descr->d_method, obj, NULL);
 }
 
 static PyObject *
@@ -239,7 +239,7 @@ methoddescr_call(PyMethodDescrObject *descr, PyObject *args, PyObject *kwds)
         return NULL;
     }
 
-    func = PyCFunction_New(descr->d_method, self);
+    func = PyCFunction_NewEx(descr->d_method, self, NULL);
     if (func == NULL)
         return NULL;
     args = PyTuple_GetSlice(args, 1, argc);
@@ -292,7 +292,7 @@ classmethoddescr_call(PyMethodDescrObject *descr, PyObject *args,
         return NULL;
     }
 
-    func = PyCFunction_New(descr->d_method, self);
+    func = PyCFunction_NewEx(descr->d_method, self, NULL);
     if (func == NULL)
         return NULL;
     args = PyTuple_GetSlice(args, 1, argc);
@@ -353,11 +353,13 @@ wrapperdescr_call(PyWrapperDescrObject *descr, PyObject *args, PyObject *kwds)
 static PyObject *
 method_get_doc(PyMethodDescrObject *descr, void *closure)
 {
-    if (descr->d_method->ml_doc == NULL) {
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    return PyUnicode_FromString(descr->d_method->ml_doc);
+    return _PyType_GetDocFromInternalDoc(descr->d_method->ml_name, descr->d_method->ml_doc);
+}
+
+static PyObject *
+method_get_text_signature(PyMethodDescrObject *descr, void *closure)
+{
+    return _PyType_GetTextSignatureFromInternalDoc(descr->d_method->ml_name, descr->d_method->ml_doc);
 }
 
 static PyObject *
@@ -398,6 +400,24 @@ descr_get_qualname(PyDescrObject *descr)
     return descr->d_qualname;
 }
 
+static PyObject *
+descr_reduce(PyDescrObject *descr)
+{
+    PyObject *builtins;
+    PyObject *getattr;
+    _Py_IDENTIFIER(getattr);
+
+    builtins = PyEval_GetBuiltins();
+    getattr = _PyDict_GetItemId(builtins, &PyId_getattr);
+    return Py_BuildValue("O(OO)", getattr, PyDescr_TYPE(descr),
+                         PyDescr_NAME(descr));
+}
+
+static PyMethodDef descr_methods[] = {
+    {"__reduce__", (PyCFunction)descr_reduce, METH_NOARGS, NULL},
+    {NULL, NULL}
+};
+
 static PyMemberDef descr_members[] = {
     {"__objclass__", T_OBJECT, offsetof(PyDescrObject, d_type), READONLY},
     {"__name__", T_OBJECT, offsetof(PyDescrObject, d_name), READONLY},
@@ -407,6 +427,7 @@ static PyMemberDef descr_members[] = {
 static PyGetSetDef method_getset[] = {
     {"__doc__", (getter)method_get_doc},
     {"__qualname__", (getter)descr_get_qualname},
+    {"__text_signature__", (getter)method_get_text_signature},
     {0}
 };
 
@@ -445,16 +466,19 @@ static PyGetSetDef getset_getset[] = {
 static PyObject *
 wrapperdescr_get_doc(PyWrapperDescrObject *descr, void *closure)
 {
-    if (descr->d_base->doc == NULL) {
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    return PyUnicode_FromString(descr->d_base->doc);
+    return _PyType_GetDocFromInternalDoc(descr->d_base->name, descr->d_base->doc);
+}
+
+static PyObject *
+wrapperdescr_get_text_signature(PyWrapperDescrObject *descr, void *closure)
+{
+    return _PyType_GetTextSignatureFromInternalDoc(descr->d_base->name, descr->d_base->doc);
 }
 
 static PyGetSetDef wrapperdescr_getset[] = {
     {"__doc__", (getter)wrapperdescr_get_doc},
     {"__qualname__", (getter)descr_get_qualname},
+    {"__text_signature__", (getter)wrapperdescr_get_text_signature},
     {0}
 };
 
@@ -494,7 +518,7 @@ PyTypeObject PyMethodDescr_Type = {
     0,                                          /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
-    0,                                          /* tp_methods */
+    descr_methods,                              /* tp_methods */
     descr_members,                              /* tp_members */
     method_getset,                              /* tp_getset */
     0,                                          /* tp_base */
@@ -532,7 +556,7 @@ PyTypeObject PyClassMethodDescr_Type = {
     0,                                          /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
-    0,                                          /* tp_methods */
+    descr_methods,                              /* tp_methods */
     descr_members,                              /* tp_members */
     method_getset,                              /* tp_getset */
     0,                                          /* tp_base */
@@ -569,7 +593,7 @@ PyTypeObject PyMemberDescr_Type = {
     0,                                          /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
-    0,                                          /* tp_methods */
+    descr_methods,                              /* tp_methods */
     descr_members,                              /* tp_members */
     member_getset,                              /* tp_getset */
     0,                                          /* tp_base */
@@ -643,7 +667,7 @@ PyTypeObject PyWrapperDescr_Type = {
     0,                                          /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
-    0,                                          /* tp_methods */
+    descr_methods,                              /* tp_methods */
     descr_members,                              /* tp_members */
     wrapperdescr_getset,                        /* tp_getset */
     0,                                          /* tp_base */
@@ -1009,7 +1033,7 @@ wrapper_dealloc(wrapperobject *wp)
 static PyObject *
 wrapper_richcompare(PyObject *a, PyObject *b, int op)
 {
-    int result;
+    Py_intptr_t result;
     PyObject *v;
     PyWrapperDescrObject *a_descr, *b_descr;
 
@@ -1085,6 +1109,23 @@ wrapper_repr(wrapperobject *wp)
                                wp->self);
 }
 
+static PyObject *
+wrapper_reduce(wrapperobject *wp)
+{
+    PyObject *builtins;
+    PyObject *getattr;
+    _Py_IDENTIFIER(getattr);
+
+    builtins = PyEval_GetBuiltins();
+    getattr = _PyDict_GetItemId(builtins, &PyId_getattr);
+    return Py_BuildValue("O(OO)", getattr, wp->self, PyDescr_NAME(wp->descr));
+}
+
+static PyMethodDef wrapper_methods[] = {
+    {"__reduce__", (PyCFunction)wrapper_reduce, METH_NOARGS, NULL},
+    {NULL, NULL}
+};
+
 static PyMemberDef wrapper_members[] = {
     {"__self__", T_OBJECT, offsetof(wrapperobject, self), READONLY},
     {0}
@@ -1108,17 +1149,15 @@ wrapper_name(wrapperobject *wp)
 }
 
 static PyObject *
-wrapper_doc(wrapperobject *wp)
+wrapper_doc(wrapperobject *wp, void *closure)
 {
-    const char *s = wp->descr->d_base->doc;
+    return _PyType_GetDocFromInternalDoc(wp->descr->d_base->name, wp->descr->d_base->doc);
+}
 
-    if (s == NULL) {
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    else {
-        return PyUnicode_FromString(s);
-    }
+static PyObject *
+wrapper_text_signature(wrapperobject *wp, void *closure)
+{
+    return _PyType_GetTextSignatureFromInternalDoc(wp->descr->d_base->name, wp->descr->d_base->doc);
 }
 
 static PyObject *
@@ -1132,6 +1171,7 @@ static PyGetSetDef wrapper_getsets[] = {
     {"__name__", (getter)wrapper_name},
     {"__qualname__", (getter)wrapper_qualname},
     {"__doc__", (getter)wrapper_doc},
+    {"__text_signature__", (getter)wrapper_text_signature},
     {0}
 };
 
@@ -1193,7 +1233,7 @@ PyTypeObject _PyMethodWrapper_Type = {
     0,                                          /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
-    0,                                          /* tp_methods */
+    wrapper_methods,                            /* tp_methods */
     wrapper_members,                            /* tp_members */
     wrapper_getsets,                            /* tp_getset */
     0,                                          /* tp_base */
@@ -1228,11 +1268,11 @@ PyWrapper_New(PyObject *d, PyObject *self)
 /* A built-in 'property' type */
 
 /*
-    class property(object):
+class property(object):
 
     def __init__(self, fget=None, fset=None, fdel=None, doc=None):
         if doc is None and fget is not None and hasattr(fget, "__doc__"):
-        doc = fget.__doc__
+            doc = fget.__doc__
         self.__get = fget
         self.__set = fset
         self.__del = fdel
@@ -1240,19 +1280,19 @@ PyWrapper_New(PyObject *d, PyObject *self)
 
     def __get__(self, inst, type=None):
         if inst is None:
-        return self
+            return self
         if self.__get is None:
-        raise AttributeError, "unreadable attribute"
+            raise AttributeError, "unreadable attribute"
         return self.__get(inst)
 
     def __set__(self, inst, value):
         if self.__set is None:
-        raise AttributeError, "can't set attribute"
+            raise AttributeError, "can't set attribute"
         return self.__set(inst, value)
 
     def __delete__(self, inst):
         if self.__del is None:
-        raise AttributeError, "can't delete attribute"
+            raise AttributeError, "can't delete attribute"
         return self.__del(inst)
 
 */
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 953484c..a494d6b 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -69,6 +69,11 @@ to the combined-table form.
 #include "Python.h"
 #include "stringlib/eq.h"
 
+/*[clinic input]
+class dict "PyDictObject *" "&PyDict_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/
+
 typedef struct {
     /* Cached hash code of me_key. */
     Py_hash_t me_hash;
@@ -95,20 +100,6 @@ To avoid slowing down lookups on a near-full table, we resize the table when
 it's USABLE_FRACTION (currently two-thirds) full.
 */
 
-/* Set a key error with the specified argument, wrapping it in a
- * tuple automatically so that tuple keys are not unpacked as the
- * exception arguments. */
-static void
-set_key_error(PyObject *arg)
-{
-    PyObject *tup;
-    tup = PyTuple_Pack(1, arg);
-    if (!tup)
-        return; /* caller will expect error to be set anyway */
-    PyErr_SetObject(PyExc_KeyError, tup);
-    Py_DECREF(tup);
-}
-
 #define PERTURB_SHIFT 5
 
 /*
@@ -305,9 +296,9 @@ PyDict_Fini(void)
  * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
 */
 
-/* GROWTH_RATE. Growth rate upon hitting maximum load. 
- * Currently set to used*2 + capacity/2. 
- * This means that dicts double in size when growing without deletions, 
+/* GROWTH_RATE. Growth rate upon hitting maximum load.
+ * Currently set to used*2 + capacity/2.
+ * This means that dicts double in size when growing without deletions,
  * but have more head room when the number of deletions is on a par with the
  * number of insertions.
  * Raising this to used*4 doubles memory consumption depending on the size of
@@ -389,6 +380,7 @@ static PyObject *
 new_dict(PyDictKeysObject *keys, PyObject **values)
 {
     PyDictObject *mp;
+    assert(keys != NULL);
     if (numfree) {
         mp = free_list[--numfree];
         assert (mp != NULL);
@@ -431,7 +423,10 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
 PyObject *
 PyDict_New(void)
 {
-    return new_dict(new_keys_object(PyDict_MINSIZE_COMBINED), NULL);
+    PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+    if (keys == NULL)
+        return NULL;
+    return new_dict(keys, NULL);
 }
 
 /*
@@ -463,13 +458,13 @@ static PyDictKeyEntry *
 lookdict(PyDictObject *mp, PyObject *key,
          Py_hash_t hash, PyObject ***value_addr)
 {
-    register size_t i;
-    register size_t perturb;
-    register PyDictKeyEntry *freeslot;
-    register size_t mask;
+    size_t i;
+    size_t perturb;
+    PyDictKeyEntry *freeslot;
+    size_t mask;
     PyDictKeyEntry *ep0;
-    register PyDictKeyEntry *ep;
-    register int cmp;
+    PyDictKeyEntry *ep;
+    int cmp;
     PyObject *startkey;
 
 top:
@@ -555,12 +550,12 @@ static PyDictKeyEntry *
 lookdict_unicode(PyDictObject *mp, PyObject *key,
                  Py_hash_t hash, PyObject ***value_addr)
 {
-    register size_t i;
-    register size_t perturb;
-    register PyDictKeyEntry *freeslot;
-    register size_t mask = DK_MASK(mp->ma_keys);
+    size_t i;
+    size_t perturb;
+    PyDictKeyEntry *freeslot;
+    size_t mask = DK_MASK(mp->ma_keys);
     PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    register PyDictKeyEntry *ep;
+    PyDictKeyEntry *ep;
 
     /* Make sure this function doesn't have to handle non-unicode keys,
        including subclasses of str; e.g., one reason to subclass
@@ -620,11 +615,11 @@ static PyDictKeyEntry *
 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
                          Py_hash_t hash, PyObject ***value_addr)
 {
-    register size_t i;
-    register size_t perturb;
-    register size_t mask = DK_MASK(mp->ma_keys);
+    size_t i;
+    size_t perturb;
+    size_t mask = DK_MASK(mp->ma_keys);
     PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    register PyDictKeyEntry *ep;
+    PyDictKeyEntry *ep;
 
     /* Make sure this function doesn't have to handle non-unicode keys,
        including subclasses of str; e.g., one reason to subclass
@@ -665,11 +660,11 @@ static PyDictKeyEntry *
 lookdict_split(PyDictObject *mp, PyObject *key,
                Py_hash_t hash, PyObject ***value_addr)
 {
-    register size_t i;
-    register size_t perturb;
-    register size_t mask = DK_MASK(mp->ma_keys);
+    size_t i;
+    size_t perturb;
+    size_t mask = DK_MASK(mp->ma_keys);
     PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    register PyDictKeyEntry *ep;
+    PyDictKeyEntry *ep;
 
     if (!PyUnicode_CheckExact(key)) {
         ep = lookdict(mp, key, hash, value_addr);
@@ -819,13 +814,14 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
     if (ep == NULL) {
         return -1;
     }
+    assert(PyUnicode_CheckExact(key) || mp->ma_keys->dk_lookup == lookdict);
     Py_INCREF(value);
     MAINTAIN_TRACKING(mp, key, value);
     old_value = *value_addr;
     if (old_value != NULL) {
         assert(ep->me_key != NULL && ep->me_key != dummy);
         *value_addr = value;
-        Py_DECREF(old_value); /* which **CAN** re-enter */
+        Py_DECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
     }
     else {
         if (ep->me_key == NULL) {
@@ -856,9 +852,8 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
         }
         mp->ma_used++;
         *value_addr = value;
+        assert(ep->me_key != NULL && ep->me_key != dummy);
     }
-    assert(ep->me_key != NULL && ep->me_key != dummy);
-    assert(PyUnicode_CheckExact(key) || mp->ma_keys->dk_lookup == lookdict);
     return 0;
 }
 
@@ -1237,7 +1232,7 @@ PyDict_DelItem(PyObject *op, PyObject *key)
     if (ep == NULL)
         return -1;
     if (*value_addr == NULL) {
-        set_key_error(key);
+        _PyErr_SetKeyError(key);
         return -1;
     }
     old_value = *value_addr;
@@ -1391,7 +1386,7 @@ dict_dealloc(PyDictObject *mp)
         }
         DK_DECREF(keys);
     }
-    else {
+    else if (keys != NULL) {
         assert(keys->dk_refcnt == 1);
         DK_DECREF(keys);
     }
@@ -1407,9 +1402,9 @@ static PyObject *
 dict_repr(PyDictObject *mp)
 {
     Py_ssize_t i;
-    PyObject *s, *temp, *colon = NULL;
-    PyObject *pieces = NULL, *result = NULL;
-    PyObject *key, *value;
+    PyObject *key = NULL, *value = NULL;
+    _PyUnicodeWriter writer;
+    int first;
 
     i = Py_ReprEnter((PyObject *)mp);
     if (i != 0) {
@@ -1417,71 +1412,73 @@ dict_repr(PyDictObject *mp)
     }
 
     if (mp->ma_used == 0) {
-        result = PyUnicode_FromString("{}");
-        goto Done;
+        Py_ReprLeave((PyObject *)mp);
+        return PyUnicode_FromString("{}");
     }
 
-    pieces = PyList_New(0);
-    if (pieces == NULL)
-        goto Done;
+    _PyUnicodeWriter_Init(&writer);
+    writer.overallocate = 1;
+    /* "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" */
+    writer.min_length = 1 + 4 + (2 + 4) * (mp->ma_used - 1) + 1;
 
-    colon = PyUnicode_FromString(": ");
-    if (colon == NULL)
-        goto Done;
+    if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0)
+        goto error;
 
     /* Do repr() on each key+value pair, and insert ": " between them.
        Note that repr may mutate the dict. */
     i = 0;
+    first = 1;
     while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
-        int status;
+        PyObject *s;
+        int res;
+
         /* Prevent repr from deleting key or value during key format. */
         Py_INCREF(key);
         Py_INCREF(value);
+
+        if (!first) {
+            if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
+                goto error;
+        }
+        first = 0;
+
         s = PyObject_Repr(key);
-        PyUnicode_Append(&s, colon);
-        PyUnicode_AppendAndDel(&s, PyObject_Repr(value));
-        Py_DECREF(key);
-        Py_DECREF(value);
         if (s == NULL)
-            goto Done;
-        status = PyList_Append(pieces, s);
-        Py_DECREF(s);  /* append created a new ref */
-        if (status < 0)
-            goto Done;
-    }
-
-    /* Add "{}" decorations to the first and last items. */
-    assert(PyList_GET_SIZE(pieces) > 0);
-    s = PyUnicode_FromString("{");
-    if (s == NULL)
-        goto Done;
-    temp = PyList_GET_ITEM(pieces, 0);
-    PyUnicode_AppendAndDel(&s, temp);
-    PyList_SET_ITEM(pieces, 0, s);
-    if (s == NULL)
-        goto Done;
-
-    s = PyUnicode_FromString("}");
-    if (s == NULL)
-        goto Done;
-    temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
-    PyUnicode_AppendAndDel(&temp, s);
-    PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
-    if (temp == NULL)
-        goto Done;
-
-    /* Paste them all together with ", " between. */
-    s = PyUnicode_FromString(", ");
-    if (s == NULL)
-        goto Done;
-    result = PyUnicode_Join(s, pieces);
-    Py_DECREF(s);
-
-Done:
-    Py_XDECREF(pieces);
-    Py_XDECREF(colon);
+            goto error;
+        res = _PyUnicodeWriter_WriteStr(&writer, s);
+        Py_DECREF(s);
+        if (res < 0)
+            goto error;
+
+        if (_PyUnicodeWriter_WriteASCIIString(&writer, ": ", 2) < 0)
+            goto error;
+
+        s = PyObject_Repr(value);
+        if (s == NULL)
+            goto error;
+        res = _PyUnicodeWriter_WriteStr(&writer, s);
+        Py_DECREF(s);
+        if (res < 0)
+            goto error;
+
+        Py_CLEAR(key);
+        Py_CLEAR(value);
+    }
+
+    writer.overallocate = 0;
+    if (_PyUnicodeWriter_WriteChar(&writer, '}') < 0)
+        goto error;
+
     Py_ReprLeave((PyObject *)mp);
-    return result;
+
+    return _PyUnicodeWriter_Finish(&writer);
+
+error:
+    Py_ReprLeave((PyObject *)mp);
+    _PyUnicodeWriter_Dealloc(&writer);
+    Py_XDECREF(key);
+    Py_XDECREF(value);
+    return NULL;
 }
 
 static Py_ssize_t
@@ -1491,7 +1488,7 @@ dict_length(PyDictObject *mp)
 }
 
 static PyObject *
-dict_subscript(PyDictObject *mp, register PyObject *key)
+dict_subscript(PyDictObject *mp, PyObject *key)
 {
     PyObject *v;
     Py_hash_t hash;
@@ -1523,7 +1520,7 @@ dict_subscript(PyDictObject *mp, register PyObject *key)
             else if (PyErr_Occurred())
                 return NULL;
         }
-        set_key_error(key);
+        _PyErr_SetKeyError(key);
         return NULL;
     }
     else
@@ -1547,10 +1544,10 @@ static PyMappingMethods dict_as_mapping = {
 };
 
 static PyObject *
-dict_keys(register PyDictObject *mp)
+dict_keys(PyDictObject *mp)
 {
-    register PyObject *v;
-    register Py_ssize_t i, j;
+    PyObject *v;
+    Py_ssize_t i, j;
     PyDictKeyEntry *ep;
     Py_ssize_t size, n, offset;
     PyObject **value_ptr;
@@ -1591,10 +1588,10 @@ dict_keys(register PyDictObject *mp)
 }
 
 static PyObject *
-dict_values(register PyDictObject *mp)
+dict_values(PyDictObject *mp)
 {
-    register PyObject *v;
-    register Py_ssize_t i, j;
+    PyObject *v;
+    Py_ssize_t i, j;
     Py_ssize_t size, n, offset;
     PyObject **value_ptr;
 
@@ -1633,10 +1630,10 @@ dict_values(register PyDictObject *mp)
 }
 
 static PyObject *
-dict_items(register PyDictObject *mp)
+dict_items(PyDictObject *mp)
 {
-    register PyObject *v;
-    register Py_ssize_t i, j, n;
+    PyObject *v;
+    Py_ssize_t i, j, n;
     Py_ssize_t size, offset;
     PyObject *item, *key;
     PyDictKeyEntry *ep;
@@ -1694,37 +1691,72 @@ dict_items(register PyDictObject *mp)
     return v;
 }
 
+/*[clinic input]
+@classmethod
+dict.fromkeys
+    iterable: object
+    value: object=None
+    /
+
+Returns a new dict with keys from iterable and values equal to value.
+[clinic start generated code]*/
+
+PyDoc_STRVAR(dict_fromkeys__doc__,
+"fromkeys($type, iterable, value=None, /)\n"
+"--\n"
+"\n"
+"Returns a new dict with keys from iterable and values equal to value.");
+
+#define DICT_FROMKEYS_METHODDEF    \
+    {"fromkeys", (PyCFunction)dict_fromkeys, METH_VARARGS|METH_CLASS, dict_fromkeys__doc__},
+
 static PyObject *
-dict_fromkeys(PyObject *cls, PyObject *args)
+dict_fromkeys_impl(PyTypeObject *type, PyObject *iterable, PyObject *value);
+
+static PyObject *
+dict_fromkeys(PyTypeObject *type, PyObject *args)
 {
-    PyObject *seq;
+    PyObject *return_value = NULL;
+    PyObject *iterable;
     PyObject *value = Py_None;
+
+    if (!PyArg_UnpackTuple(args, "fromkeys",
+        1, 2,
+        &iterable, &value))
+        goto exit;
+    return_value = dict_fromkeys_impl(type, iterable, value);
+
+exit:
+    return return_value;
+}
+
+static PyObject *
+dict_fromkeys_impl(PyTypeObject *type, PyObject *iterable, PyObject *value)
+/*[clinic end generated code: output=55f8dc0ffa87406f input=b85a667f9bf4669d]*/
+{
     PyObject *it;       /* iter(seq) */
     PyObject *key;
     PyObject *d;
     int status;
 
-    if (!PyArg_UnpackTuple(args, "fromkeys", 1, 2, &seq, &value))
-        return NULL;
-
-    d = PyObject_CallObject(cls, NULL);
+    d = PyObject_CallObject((PyObject *)type, NULL);
     if (d == NULL)
         return NULL;
 
     if (PyDict_CheckExact(d) && ((PyDictObject *)d)->ma_used == 0) {
-        if (PyDict_CheckExact(seq)) {
+        if (PyDict_CheckExact(iterable)) {
             PyDictObject *mp = (PyDictObject *)d;
             PyObject *oldvalue;
             Py_ssize_t pos = 0;
             PyObject *key;
             Py_hash_t hash;
 
-            if (dictresize(mp, Py_SIZE(seq))) {
+            if (dictresize(mp, Py_SIZE(iterable))) {
                 Py_DECREF(d);
                 return NULL;
             }
 
-            while (_PyDict_Next(seq, &pos, &key, &oldvalue, &hash)) {
+            while (_PyDict_Next(iterable, &pos, &key, &oldvalue, &hash)) {
                 if (insertdict(mp, key, hash, value)) {
                     Py_DECREF(d);
                     return NULL;
@@ -1732,18 +1764,18 @@ dict_fromkeys(PyObject *cls, PyObject *args)
             }
             return d;
         }
-        if (PyAnySet_CheckExact(seq)) {
+        if (PyAnySet_CheckExact(iterable)) {
             PyDictObject *mp = (PyDictObject *)d;
             Py_ssize_t pos = 0;
             PyObject *key;
             Py_hash_t hash;
 
-            if (dictresize(mp, PySet_GET_SIZE(seq))) {
+            if (dictresize(mp, PySet_GET_SIZE(iterable))) {
                 Py_DECREF(d);
                 return NULL;
             }
 
-            while (_PySet_NextEntry(seq, &pos, &key, &hash)) {
+            while (_PySet_NextEntry(iterable, &pos, &key, &hash)) {
                 if (insertdict(mp, key, hash, value)) {
                     Py_DECREF(d);
                     return NULL;
@@ -1753,7 +1785,7 @@ dict_fromkeys(PyObject *cls, PyObject *args)
         }
     }
 
-    it = PyObject_GetIter(seq);
+    it = PyObject_GetIter(iterable);
     if (it == NULL){
         Py_DECREF(d);
         return NULL;
@@ -1908,8 +1940,8 @@ PyDict_Update(PyObject *a, PyObject *b)
 int
 PyDict_Merge(PyObject *a, PyObject *b, int override)
 {
-    register PyDictObject *mp, *other;
-    register Py_ssize_t i, n;
+    PyDictObject *mp, *other;
+    Py_ssize_t i, n;
     PyDictKeyEntry *entry;
 
     /* We accept for the argument either a concrete dictionary object,
@@ -2018,7 +2050,7 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
 }
 
 static PyObject *
-dict_copy(register PyDictObject *mp)
+dict_copy(PyDictObject *mp)
 {
     return PyDict_Copy((PyObject*)mp);
 }
@@ -2130,13 +2162,18 @@ dict_equal(PyDictObject *a, PyDictObject *b)
         if (aval != NULL) {
             int cmp;
             PyObject *bval;
+            PyObject **vaddr;
             PyObject *key = ep->me_key;
             /* temporarily bump aval's refcount to ensure it stays
                alive until we're done with it */
             Py_INCREF(aval);
             /* ditto for key */
             Py_INCREF(key);
-            bval = PyDict_GetItemWithError((PyObject *)b, key);
+            /* reuse the known hash value */
+            if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr) == NULL)
+                bval = NULL;
+            else
+                bval = *vaddr;
             Py_DECREF(key);
             if (bval == NULL) {
                 Py_DECREF(aval);
@@ -2174,9 +2211,31 @@ dict_richcompare(PyObject *v, PyObject *w, int op)
     return res;
 }
 
+/*[clinic input]
+
+@coexist
+dict.__contains__
+
+  key: object
+  /
+
+True if D has a key k, else False.
+[clinic start generated code]*/
+
+PyDoc_STRVAR(dict___contains____doc__,
+"__contains__($self, key, /)\n"
+"--\n"
+"\n"
+"True if D has a key k, else False.");
+
+#define DICT___CONTAINS___METHODDEF    \
+    {"__contains__", (PyCFunction)dict___contains__, METH_O|METH_COEXIST, dict___contains____doc__},
+
 static PyObject *
-dict_contains(register PyDictObject *mp, PyObject *key)
+dict___contains__(PyDictObject *self, PyObject *key)
+/*[clinic end generated code: output=3cf3f8aaf2cc5cc3 input=b852b2a19b51ab24]*/
 {
+    register PyDictObject *mp = self;
     Py_hash_t hash;
     PyDictKeyEntry *ep;
     PyObject **value_addr;
@@ -2194,7 +2253,7 @@ dict_contains(register PyDictObject *mp, PyObject *key)
 }
 
 static PyObject *
-dict_get(register PyDictObject *mp, PyObject *args)
+dict_get(PyDictObject *mp, PyObject *args)
 {
     PyObject *key;
     PyObject *failobj = Py_None;
@@ -2222,19 +2281,19 @@ dict_get(register PyDictObject *mp, PyObject *args)
     return val;
 }
 
-static PyObject *
-dict_setdefault(register PyDictObject *mp, PyObject *args)
+PyObject *
+PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
 {
-    PyObject *key;
-    PyObject *failobj = Py_None;
+    PyDictObject *mp = (PyDictObject *)d;
     PyObject *val = NULL;
     Py_hash_t hash;
     PyDictKeyEntry *ep;
     PyObject **value_addr;
 
-    if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj))
+    if (!PyDict_Check(d)) {
+        PyErr_BadInternalCall();
         return NULL;
-
+    }
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
@@ -2252,23 +2311,35 @@ dict_setdefault(register PyDictObject *mp, PyObject *args)
                 return NULL;
             ep = find_empty_slot(mp, key, hash, &value_addr);
         }
-        Py_INCREF(failobj);
+        Py_INCREF(defaultobj);
         Py_INCREF(key);
-        MAINTAIN_TRACKING(mp, key, failobj);
+        MAINTAIN_TRACKING(mp, key, defaultobj);
         ep->me_key = key;
         ep->me_hash = hash;
-        *value_addr = failobj;
-        val = failobj;
+        *value_addr = defaultobj;
+        val = defaultobj;
         mp->ma_keys->dk_usable--;
         mp->ma_used++;
     }
-    Py_INCREF(val);
     return val;
 }
 
+static PyObject *
+dict_setdefault(PyDictObject *mp, PyObject *args)
+{
+    PyObject *key, *val;
+    PyObject *defaultobj = Py_None;
+
+    if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &defaultobj))
+        return NULL;
+
+    val = PyDict_SetDefault((PyObject *)mp, key, defaultobj);
+    Py_XINCREF(val);
+    return val;
+}
 
 static PyObject *
-dict_clear(register PyDictObject *mp)
+dict_clear(PyDictObject *mp)
 {
     PyDict_Clear((PyObject *)mp);
     Py_RETURN_NONE;
@@ -2290,7 +2361,7 @@ dict_pop(PyDictObject *mp, PyObject *args)
             Py_INCREF(deflt);
             return deflt;
         }
-        set_key_error(key);
+        _PyErr_SetKeyError(key);
         return NULL;
     }
     if (!PyUnicode_CheckExact(key) ||
@@ -2308,7 +2379,7 @@ dict_pop(PyDictObject *mp, PyObject *args)
             Py_INCREF(deflt);
             return deflt;
         }
-        set_key_error(key);
+        _PyErr_SetKeyError(key);
         return NULL;
     }
     *value_addr = NULL;
@@ -2449,9 +2520,6 @@ _PyDict_KeysSize(PyDictKeysObject *keys)
     return sizeof(PyDictKeysObject) + (DK_SIZE(keys)-1) * sizeof(PyDictKeyEntry);
 }
 
-PyDoc_STRVAR(contains__doc__,
-"D.__contains__(k) -> True if D has a key k, else False");
-
 PyDoc_STRVAR(getitem__doc__, "x.__getitem__(y) <==> x[y]");
 
 PyDoc_STRVAR(sizeof__doc__,
@@ -2472,14 +2540,10 @@ PyDoc_STRVAR(popitem__doc__,
 2-tuple; but raise KeyError if D is empty.");
 
 PyDoc_STRVAR(update__doc__,
-"D.update([E, ]**F) -> None.  Update D from dict/iterable E and F.\n"
-"If E present and has a .keys() method, does:     for k in E: D[k] = E[k]\n\
-If E present and lacks .keys() method, does:     for (k, v) in E: D[k] = v\n\
-In either case, this is followed by: for k in F: D[k] = F[k]");
-
-PyDoc_STRVAR(fromkeys__doc__,
-"dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v.\n\
-v defaults to None.");
+"D.update([E, ]**F) -> None.  Update D from dict/iterable E and F.\n\
+If E is present and has a .keys() method, then does:  for k in E: D[k] = E[k]\n\
+If E is present and lacks a .keys() method, then does:  for k, v in E: D[k] = v\n\
+In either case, this is followed by: for k in F:  D[k] = F[k]");
 
 PyDoc_STRVAR(clear__doc__,
 "D.clear() -> None.  Remove all items from D.");
@@ -2500,8 +2564,7 @@ PyDoc_STRVAR(values__doc__,
              "D.values() -> an object providing a view on D's values");
 
 static PyMethodDef mapp_methods[] = {
-    {"__contains__",(PyCFunction)dict_contains,     METH_O | METH_COEXIST,
-     contains__doc__},
+    DICT___CONTAINS___METHODDEF
     {"__getitem__", (PyCFunction)dict_subscript,        METH_O | METH_COEXIST,
      getitem__doc__},
     {"__sizeof__",      (PyCFunction)dict_sizeof,       METH_NOARGS,
@@ -2522,8 +2585,7 @@ static PyMethodDef mapp_methods[] = {
     values__doc__},
     {"update",          (PyCFunction)dict_update,       METH_VARARGS | METH_KEYWORDS,
      update__doc__},
-    {"fromkeys",        (PyCFunction)dict_fromkeys,     METH_VARARGS | METH_CLASS,
-     fromkeys__doc__},
+    DICT_FROMKEYS_METHODDEF
     {"clear",           (PyCFunction)dict_clear,        METH_NOARGS,
      clear__doc__},
     {"copy",            (PyCFunction)dict_copy,         METH_NOARGS,
@@ -2580,22 +2642,23 @@ static PyObject *
 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
     PyObject *self;
+    PyDictObject *d;
 
     assert(type != NULL && type->tp_alloc != NULL);
     self = type->tp_alloc(type, 0);
-    if (self != NULL) {
-        PyDictObject *d = (PyDictObject *)self;
-        d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED);
-        /* XXX - Should we raise a no-memory error? */
-        if (d->ma_keys == NULL) {
-            DK_INCREF(Py_EMPTY_KEYS);
-            d->ma_keys = Py_EMPTY_KEYS;
-            d->ma_values = empty_values;
-        }
-        d->ma_used = 0;
-        /* The object has been implicitly tracked by tp_alloc */
-        if (type == &PyDict_Type)
-            _PyObject_GC_UNTRACK(d);
+    if (self == NULL)
+        return NULL;
+    d = (PyDictObject *)self;
+
+    /* The object has been implicitly tracked by tp_alloc */
+    if (type == &PyDict_Type)
+        _PyObject_GC_UNTRACK(d);
+
+    d->ma_used = 0;
+    d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+    if (d->ma_keys == NULL) {
+        Py_DECREF(self);
+        return NULL;
     }
     return self;
 }
@@ -2671,8 +2734,10 @@ _PyDict_GetItemId(PyObject *dp, struct _Py_Identifier *key)
 {
     PyObject *kv;
     kv = _PyUnicode_FromId(key); /* borrowed */
-    if (kv == NULL)
+    if (kv == NULL) {
+        PyErr_Clear();
         return NULL;
+    }
     return PyDict_GetItem(dp, kv);
 }
 
@@ -2683,8 +2748,10 @@ PyDict_GetItemString(PyObject *v, const char *key)
 {
     PyObject *kv, *rv;
     kv = PyUnicode_FromString(key);
-    if (kv == NULL)
+    if (kv == NULL) {
+        PyErr_Clear();
         return NULL;
+    }
     rv = PyDict_GetItem(v, kv);
     Py_DECREF(kv);
     return rv;
@@ -2715,6 +2782,15 @@ PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
 }
 
 int
+_PyDict_DelItemId(PyObject *v, _Py_Identifier *key)
+{
+    PyObject *kv = _PyUnicode_FromId(key); /* borrowed */
+    if (kv == NULL)
+        return -1;
+    return PyDict_DelItem(v, kv);
+}
+
+int
 PyDict_DelItemString(PyObject *v, const char *key)
 {
     PyObject *kv;
@@ -2807,8 +2883,8 @@ static PyMethodDef dictiter_methods[] = {
 static PyObject *dictiter_iternextkey(dictiterobject *di)
 {
     PyObject *key;
-    register Py_ssize_t i, mask, offset;
-    register PyDictKeysObject *k;
+    Py_ssize_t i, mask, offset;
+    PyDictKeysObject *k;
     PyDictObject *d = di->di_dict;
     PyObject **value_ptr;
 
@@ -2890,7 +2966,7 @@ PyTypeObject PyDictIterKey_Type = {
 static PyObject *dictiter_iternextvalue(dictiterobject *di)
 {
     PyObject *value;
-    register Py_ssize_t i, mask, offset;
+    Py_ssize_t i, mask, offset;
     PyDictObject *d = di->di_dict;
     PyObject **value_ptr;
 
@@ -2971,7 +3047,7 @@ PyTypeObject PyDictIterValue_Type = {
 static PyObject *dictiter_iternextitem(dictiterobject *di)
 {
     PyObject *key, *value, *result = di->di_result;
-    register Py_ssize_t i, mask, offset;
+    Py_ssize_t i, mask, offset;
     PyDictObject *d = di->di_dict;
     PyObject **value_ptr;
 
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 6b04700..1a218c1 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -16,9 +16,6 @@ PyObject *PyExc_IOError = NULL;
 #ifdef MS_WINDOWS
 PyObject *PyExc_WindowsError = NULL;
 #endif
-#ifdef __VMS
-PyObject *PyExc_VMSError = NULL;
-#endif
 
 /* The dict map from errno codes to OSError subclasses */
 static PyObject *errnomap = NULL;
@@ -121,11 +118,11 @@ BaseException_str(PyBaseExceptionObject *self)
 static PyObject *
 BaseException_repr(PyBaseExceptionObject *self)
 {
-    char *name;
-    char *dot;
+    const char *name;
+    const char *dot;
 
-    name = (char *)Py_TYPE(self)->tp_name;
-    dot = strrchr(name, '.');
+    name = Py_TYPE(self)->tp_name;
+    dot = (const char *) strrchr(name, '.');
     if (dot != NULL) name = dot+1;
 
     return PyUnicode_FromFormat("%s%R", name, self->args);
@@ -727,13 +724,17 @@ ComplexExtendsException(PyExc_Exception, ImportError,
  * we hack args so that it only contains two items.  This also
  * means we need our own __str__() which prints out the filename
  * when it was supplied.
+ *
+ * (If a function has two filenames, such as rename(), symlink(),
+ * or copy(), PyErr_SetFromErrnoWithFilenameObjects() is called,
+ * which allows passing in a second filename.)
  */
 
 /* This function doesn't cleanup on error, the caller should */
 static int
 oserror_parse_args(PyObject **p_args,
                    PyObject **myerrno, PyObject **strerror,
-                   PyObject **filename
+                   PyObject **filename, PyObject **filename2
 #ifdef MS_WINDOWS
                    , PyObject **winerror
 #endif
@@ -741,14 +742,23 @@ oserror_parse_args(PyObject **p_args,
 {
     Py_ssize_t nargs;
     PyObject *args = *p_args;
+#ifndef MS_WINDOWS
+    /*
+     * ignored on non-Windows platforms,
+     * but parsed so OSError has a consistent signature
+     */
+    PyObject *_winerror = NULL;
+    PyObject **winerror = &_winerror;
+#endif /* MS_WINDOWS */
 
     nargs = PyTuple_GET_SIZE(args);
 
-#ifdef MS_WINDOWS
-    if (nargs >= 2 && nargs <= 4) {
-        if (!PyArg_UnpackTuple(args, "OSError", 2, 4,
-                               myerrno, strerror, filename, winerror))
+    if (nargs >= 2 && nargs <= 5) {
+        if (!PyArg_UnpackTuple(args, "OSError", 2, 5,
+                               myerrno, strerror,
+                               filename, winerror, filename2))
             return -1;
+#ifdef MS_WINDOWS
         if (*winerror && PyLong_Check(*winerror)) {
             long errcode, winerrcode;
             PyObject *newargs;
@@ -780,14 +790,8 @@ oserror_parse_args(PyObject **p_args,
             Py_DECREF(args);
             args = *p_args = newargs;
         }
+#endif /* MS_WINDOWS */
     }
-#else
-    if (nargs >= 2 && nargs <= 3) {
-        if (!PyArg_UnpackTuple(args, "OSError", 2, 3,
-                               myerrno, strerror, filename))
-            return -1;
-    }
-#endif
 
     return 0;
 }
@@ -795,7 +799,7 @@ oserror_parse_args(PyObject **p_args,
 static int
 oserror_init(PyOSErrorObject *self, PyObject **p_args,
              PyObject *myerrno, PyObject *strerror,
-             PyObject *filename
+             PyObject *filename, PyObject *filename2
 #ifdef MS_WINDOWS
              , PyObject *winerror
 #endif
@@ -819,9 +823,14 @@ oserror_init(PyOSErrorObject *self, PyObject **p_args,
             Py_INCREF(filename);
             self->filename = filename;
 
-            if (nargs >= 2 && nargs <= 3) {
-                /* filename is removed from the args tuple (for compatibility
-                   purposes, see test_exceptions.py) */
+            if (filename2 && filename2 != Py_None) {
+                Py_INCREF(filename2);
+                self->filename2 = filename2;
+            }
+
+            if (nargs >= 2 && nargs <= 5) {
+                /* filename, filename2, and winerror are removed from the args tuple
+                   (for compatibility purposes, see test_exceptions.py) */
                 PyObject *subslice = PyTuple_GetSlice(args, 0, 2);
                 if (!subslice)
                     return -1;
@@ -845,7 +854,7 @@ oserror_init(PyOSErrorObject *self, PyObject **p_args,
     /* Steals the reference to args */
     Py_CLEAR(self->args);
     self->args = args;
-    args = NULL;
+    *p_args = args = NULL;
 
     return 0;
 }
@@ -858,7 +867,7 @@ OSError_init(PyOSErrorObject *self, PyObject *args, PyObject *kwds);
 static int
 oserror_use_init(PyTypeObject *type)
 {
-    /* When __init__ is defined in a OSError subclass, we want any
+    /* When __init__ is defined in an OSError subclass, we want any
        extraneous argument to __new__ to be ignored.  The only reasonable
        solution, given __new__ takes a variable number of arguments,
        is to defer arg parsing and initialization to __init__.
@@ -880,17 +889,20 @@ static PyObject *
 OSError_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
     PyOSErrorObject *self = NULL;
-    PyObject *myerrno = NULL, *strerror = NULL, *filename = NULL;
+    PyObject *myerrno = NULL, *strerror = NULL;
+    PyObject *filename = NULL, *filename2 = NULL;
 #ifdef MS_WINDOWS
     PyObject *winerror = NULL;
 #endif
 
+    Py_INCREF(args);
+
     if (!oserror_use_init(type)) {
         if (!_PyArg_NoKeywords(type->tp_name, kwds))
-            return NULL;
+            goto error;
 
-        Py_INCREF(args);
-        if (oserror_parse_args(&args, &myerrno, &strerror, &filename
+        if (oserror_parse_args(&args, &myerrno, &strerror,
+                               &filename, &filename2
 #ifdef MS_WINDOWS
                                , &winerror
 #endif
@@ -919,7 +931,7 @@ OSError_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     self->written = -1;
 
     if (!oserror_use_init(type)) {
-        if (oserror_init(self, &args, myerrno, strerror, filename
+        if (oserror_init(self, &args, myerrno, strerror, filename, filename2
 #ifdef MS_WINDOWS
                          , winerror
 #endif
@@ -932,6 +944,7 @@ OSError_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
             goto error;
     }
 
+    Py_XDECREF(args);
     return (PyObject *) self;
 
 error:
@@ -943,7 +956,8 @@ error:
 static int
 OSError_init(PyOSErrorObject *self, PyObject *args, PyObject *kwds)
 {
-    PyObject *myerrno = NULL, *strerror = NULL, *filename = NULL;
+    PyObject *myerrno = NULL, *strerror = NULL;
+    PyObject *filename = NULL, *filename2 = NULL;
 #ifdef MS_WINDOWS
     PyObject *winerror = NULL;
 #endif
@@ -956,14 +970,14 @@ OSError_init(PyOSErrorObject *self, PyObject *args, PyObject *kwds)
         return -1;
 
     Py_INCREF(args);
-    if (oserror_parse_args(&args, &myerrno, &strerror, &filename
+    if (oserror_parse_args(&args, &myerrno, &strerror, &filename, &filename2
 #ifdef MS_WINDOWS
                            , &winerror
 #endif
         ))
         goto error;
 
-    if (oserror_init(self, &args, myerrno, strerror, filename
+    if (oserror_init(self, &args, myerrno, strerror, filename, filename2
 #ifdef MS_WINDOWS
                      , winerror
 #endif
@@ -983,6 +997,7 @@ OSError_clear(PyOSErrorObject *self)
     Py_CLEAR(self->myerrno);
     Py_CLEAR(self->strerror);
     Py_CLEAR(self->filename);
+    Py_CLEAR(self->filename2);
 #ifdef MS_WINDOWS
     Py_CLEAR(self->winerror);
 #endif
@@ -1004,6 +1019,7 @@ OSError_traverse(PyOSErrorObject *self, visitproc visit,
     Py_VISIT(self->myerrno);
     Py_VISIT(self->strerror);
     Py_VISIT(self->filename);
+    Py_VISIT(self->filename2);
 #ifdef MS_WINDOWS
     Py_VISIT(self->winerror);
 #endif
@@ -1013,23 +1029,42 @@ OSError_traverse(PyOSErrorObject *self, visitproc visit,
 static PyObject *
 OSError_str(PyOSErrorObject *self)
 {
+#define OR_NONE(x) ((x)?(x):Py_None)
 #ifdef MS_WINDOWS
     /* If available, winerror has the priority over myerrno */
-    if (self->winerror && self->filename)
-        return PyUnicode_FromFormat("[WinError %S] %S: %R",
-                                    self->winerror ? self->winerror: Py_None,
-                                    self->strerror ? self->strerror: Py_None,
-                                    self->filename);
+    if (self->winerror && self->filename) {
+        if (self->filename2) {
+            return PyUnicode_FromFormat("[WinError %S] %S: %R -> %R",
+                                        OR_NONE(self->winerror),
+                                        OR_NONE(self->strerror),
+                                        self->filename,
+                                        self->filename2);
+        } else {
+            return PyUnicode_FromFormat("[WinError %S] %S: %R",
+                                        OR_NONE(self->winerror),
+                                        OR_NONE(self->strerror),
+                                        self->filename);
+        }
+    }
     if (self->winerror && self->strerror)
         return PyUnicode_FromFormat("[WinError %S] %S",
                                     self->winerror ? self->winerror: Py_None,
                                     self->strerror ? self->strerror: Py_None);
 #endif
-    if (self->filename)
-        return PyUnicode_FromFormat("[Errno %S] %S: %R",
-                                    self->myerrno ? self->myerrno: Py_None,
-                                    self->strerror ? self->strerror: Py_None,
-                                    self->filename);
+    if (self->filename) {
+        if (self->filename2) {
+            return PyUnicode_FromFormat("[Errno %S] %S: %R -> %R",
+                                        OR_NONE(self->myerrno),
+                                        OR_NONE(self->strerror),
+                                        self->filename,
+                                        self->filename2);
+        } else {
+            return PyUnicode_FromFormat("[Errno %S] %S: %R",
+                                        OR_NONE(self->myerrno),
+                                        OR_NONE(self->strerror),
+                                        self->filename);
+        }
+    }
     if (self->myerrno && self->strerror)
         return PyUnicode_FromFormat("[Errno %S] %S",
                                     self->myerrno ? self->myerrno: Py_None,
@@ -1046,7 +1081,8 @@ OSError_reduce(PyOSErrorObject *self)
     /* self->args is only the first two real arguments if there was a
      * file name given to OSError. */
     if (PyTuple_GET_SIZE(args) == 2 && self->filename) {
-        args = PyTuple_New(3);
+        Py_ssize_t size = self->filename2 ? 5 : 3;
+        args = PyTuple_New(size);
         if (!args)
             return NULL;
 
@@ -1060,6 +1096,20 @@ OSError_reduce(PyOSErrorObject *self)
 
         Py_INCREF(self->filename);
         PyTuple_SET_ITEM(args, 2, self->filename);
+
+        if (self->filename2) {
+            /*
+             * This tuple is essentially used as OSError(*args).
+             * So, to recreate filename2, we need to pass in
+             * winerror as well.
+             */
+            Py_INCREF(Py_None);
+            PyTuple_SET_ITEM(args, 3, Py_None);
+
+            /* filename2 */
+            Py_INCREF(self->filename2);
+            PyTuple_SET_ITEM(args, 4, self->filename2);
+        }
     } else
         Py_INCREF(args);
 
@@ -1099,6 +1149,8 @@ static PyMemberDef OSError_members[] = {
         PyDoc_STR("exception strerror")},
     {"filename", T_OBJECT, offsetof(PyOSErrorObject, filename), 0,
         PyDoc_STR("exception filename")},
+    {"filename2", T_OBJECT, offsetof(PyOSErrorObject, filename2), 0,
+        PyDoc_STR("second exception filename")},
 #ifdef MS_WINDOWS
     {"winerror", T_OBJECT, offsetof(PyOSErrorObject, winerror), 0,
         PyDoc_STR("Win32 exception code")},
@@ -1202,6 +1254,9 @@ SimpleExtendsException(PyExc_Exception, AttributeError,
  *    SyntaxError extends Exception
  */
 
+/* Helper function to customise error message for some syntax errors */
+static int _report_missing_parentheses(PySyntaxErrorObject *self);
+
 static int
 SyntaxError_init(PySyntaxErrorObject *self, PyObject *args, PyObject *kwds)
 {
@@ -1246,6 +1301,13 @@ SyntaxError_init(PySyntaxErrorObject *self, PyObject *args, PyObject *kwds)
         Py_INCREF(self->text);
 
         Py_DECREF(info);
+
+        /* Issue #21669: Custom error for 'print' & 'exec' as statements */
+        if (self->text && PyUnicode_Check(self->text)) {
+            if (_report_missing_parentheses(self) < 0) {
+                return -1;
+            }
+        }
     }
     return 0;
 }
@@ -1785,6 +1847,10 @@ UnicodeEncodeError_str(PyObject *self)
     PyObject *reason_str = NULL;
     PyObject *encoding_str = NULL;
 
+    if (!uself->object)
+        /* Not properly initialized. */
+        return PyUnicode_FromString("");
+
     /* Get reason and encoding as strings, which they might not be if
        they've been modified after we were contructed. */
     reason_str = PyObject_Str(uself->reason);
@@ -1856,8 +1922,6 @@ static int
 UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
     PyUnicodeErrorObject *ude;
-    const char *data;
-    Py_ssize_t size;
 
     if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
         return -1;
@@ -1878,21 +1942,27 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
              return -1;
     }
 
-    if (!PyBytes_Check(ude->object)) {
-        if (PyObject_AsReadBuffer(ude->object, (const void **)&data, &size)) {
-            ude->encoding = ude->object = ude->reason = NULL;
-            return -1;
-        }
-        ude->object = PyBytes_FromStringAndSize(data, size);
-    }
-    else {
-        Py_INCREF(ude->object);
-    }
-
     Py_INCREF(ude->encoding);
+    Py_INCREF(ude->object);
     Py_INCREF(ude->reason);
 
+    if (!PyBytes_Check(ude->object)) {
+        Py_buffer view;
+        if (PyObject_GetBuffer(ude->object, &view, PyBUF_SIMPLE) != 0)
+            goto error;
+        Py_CLEAR(ude->object);
+        ude->object = PyBytes_FromStringAndSize(view.buf, view.len);
+        PyBuffer_Release(&view);
+        if (!ude->object)
+            goto error;
+    }
     return 0;
+
+error:
+    Py_CLEAR(ude->encoding);
+    Py_CLEAR(ude->object);
+    Py_CLEAR(ude->reason);
+    return -1;
 }
 
 static PyObject *
@@ -1903,6 +1973,10 @@ UnicodeDecodeError_str(PyObject *self)
     PyObject *reason_str = NULL;
     PyObject *encoding_str = NULL;
 
+    if (!uself->object)
+        /* Not properly initialized. */
+        return PyUnicode_FromString("");
+
     /* Get reason and encoding as strings, which they might not be if
        they've been modified after we were contructed. */
     reason_str = PyObject_Str(uself->reason);
@@ -1997,6 +2071,10 @@ UnicodeTranslateError_str(PyObject *self)
     PyObject *result = NULL;
     PyObject *reason_str = NULL;
 
+    if (!uself->object)
+        /* Not properly initialized. */
+        return PyUnicode_FromString("");
+
     /* Get reason as a string, which it might not be if it's been
        modified after we were contructed. */
     reason_str = PyObject_Str(uself->reason);
@@ -2060,7 +2138,7 @@ _PyUnicodeTranslateError_Create(
     PyObject *object,
     Py_ssize_t start, Py_ssize_t end, const char *reason)
 {
-    return PyObject_CallFunction(PyExc_UnicodeTranslateError, "Ons",
+    return PyObject_CallFunction(PyExc_UnicodeTranslateError, "Onns",
                                  object, start, end, reason);
 }
 
@@ -2327,7 +2405,7 @@ PyObject *PyExc_RecursionErrorInst = NULL;
     }
 
 #ifdef MS_WINDOWS
-#include <Winsock2.h>
+#include <winsock2.h>
 /* The following constants were added to errno.h in VS2010 but have
    preferred WSA equivalents. */
 #undef EADDRINUSE
@@ -2471,9 +2549,6 @@ _PyExc_Init(PyObject *bltinmod)
 #ifdef MS_WINDOWS
     INIT_ALIAS(WindowsError, OSError)
 #endif
-#ifdef __VMS
-    INIT_ALIAS(VMSError, OSError)
-#endif
     POST_INIT(EOFError)
     POST_INIT(RuntimeError)
     POST_INIT(NotImplementedError)
@@ -2591,3 +2666,259 @@ _PyExc_Fini(void)
     free_preallocated_memerrors();
     Py_CLEAR(errnomap);
 }
+
+/* Helper to do the equivalent of "raise X from Y" in C, but always using
+ * the current exception rather than passing one in.
+ *
+ * We currently limit this to *only* exceptions that use the BaseException
+ * tp_init and tp_new methods, since we can be reasonably sure we can wrap
+ * those correctly without losing data and without losing backwards
+ * compatibility.
+ *
+ * We also aim to rule out *all* exceptions that might be storing additional
+ * state, whether by having a size difference relative to BaseException,
+ * additional arguments passed in during construction or by having a
+ * non-empty instance dict.
+ *
+ * We need to be very careful with what we wrap, since changing types to
+ * a broader exception type would be backwards incompatible for
+ * existing codecs, and with different init or new method implementations
+ * may either not support instantiation with PyErr_Format or lose
+ * information when instantiated that way.
+ *
+ * XXX (ncoghlan): This could be made more comprehensive by exploiting the
+ * fact that exceptions are expected to support pickling. If more builtin
+ * exceptions (e.g. AttributeError) start to be converted to rich
+ * exceptions with additional attributes, that's probably a better approach
+ * to pursue over adding special cases for particular stateful subclasses.
+ *
+ * Returns a borrowed reference to the new exception (if any), NULL if the
+ * existing exception was left in place.
+ */
+PyObject *
+_PyErr_TrySetFromCause(const char *format, ...)
+{
+    PyObject* msg_prefix;
+    PyObject *exc, *val, *tb;
+    PyTypeObject *caught_type;
+    PyObject **dictptr;
+    PyObject *instance_args;
+    Py_ssize_t num_args, caught_type_size, base_exc_size;
+    PyObject *new_exc, *new_val, *new_tb;
+    va_list vargs;
+    int same_basic_size;
+
+    PyErr_Fetch(&exc, &val, &tb);
+    caught_type = (PyTypeObject *)exc;
+    /* Ensure type info indicates no extra state is stored at the C level
+     * and that the type can be reinstantiated using PyErr_Format
+     */
+    caught_type_size = caught_type->tp_basicsize;
+    base_exc_size = _PyExc_BaseException.tp_basicsize;
+    same_basic_size = (
+        caught_type_size == base_exc_size ||
+        (PyType_SUPPORTS_WEAKREFS(caught_type) &&
+            (caught_type_size == base_exc_size + sizeof(PyObject *))
+        )
+    );
+    if (caught_type->tp_init != (initproc)BaseException_init ||
+        caught_type->tp_new != BaseException_new ||
+        !same_basic_size ||
+        caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize) {
+        /* We can't be sure we can wrap this safely, since it may contain
+         * more state than just the exception type. Accordingly, we just
+         * leave it alone.
+         */
+        PyErr_Restore(exc, val, tb);
+        return NULL;
+    }
+
+    /* Check the args are empty or contain a single string */
+    PyErr_NormalizeException(&exc, &val, &tb);
+    instance_args = ((PyBaseExceptionObject *)val)->args;
+    num_args = PyTuple_GET_SIZE(instance_args);
+    if (num_args > 1 ||
+        (num_args == 1 &&
+         !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)))) {
+        /* More than 1 arg, or the one arg we do have isn't a string
+         */
+        PyErr_Restore(exc, val, tb);
+        return NULL;
+    }
+
+    /* Ensure the instance dict is also empty */
+    dictptr = _PyObject_GetDictPtr(val);
+    if (dictptr != NULL && *dictptr != NULL &&
+        PyObject_Length(*dictptr) > 0) {
+        /* While we could potentially copy a non-empty instance dictionary
+         * to the replacement exception, for now we take the more
+         * conservative path of leaving exceptions with attributes set
+         * alone.
+         */
+        PyErr_Restore(exc, val, tb);
+        return NULL;
+    }
+
+    /* For exceptions that we can wrap safely, we chain the original
+     * exception to a new one of the exact same type with an
+     * error message that mentions the additional details and the
+     * original exception.
+     *
+     * It would be nice to wrap OSError and various other exception
+     * types as well, but that's quite a bit trickier due to the extra
+     * state potentially stored on OSError instances.
+     */
+    /* Ensure the traceback is set correctly on the existing exception */
+    if (tb != NULL) {
+        PyException_SetTraceback(val, tb);
+        Py_DECREF(tb);
+    }
+
+#ifdef HAVE_STDARG_PROTOTYPES
+    va_start(vargs, format);
+#else
+    va_start(vargs);
+#endif
+    msg_prefix = PyUnicode_FromFormatV(format, vargs);
+    va_end(vargs);
+    if (msg_prefix == NULL) {
+        Py_DECREF(exc);
+        Py_DECREF(val);
+        return NULL;
+    }
+
+    PyErr_Format(exc, "%U (%s: %S)",
+                 msg_prefix, Py_TYPE(val)->tp_name, val);
+    Py_DECREF(exc);
+    Py_DECREF(msg_prefix);
+    PyErr_Fetch(&new_exc, &new_val, &new_tb);
+    PyErr_NormalizeException(&new_exc, &new_val, &new_tb);
+    PyException_SetCause(new_val, val);
+    PyErr_Restore(new_exc, new_val, new_tb);
+    return new_val;
+}
+
+
+/* To help with migration from Python 2, SyntaxError.__init__ applies some
+ * heuristics to try to report a more meaningful exception when print and
+ * exec are used like statements.
+ *
+ * The heuristics are currently expected to detect the following cases:
+ *   - top level statement
+ *   - statement in a nested suite
+ *   - trailing section of a one line complex statement
+ *
+ * They're currently known not to trigger:
+ *   - after a semi-colon
+ *
+ * The error message can be a bit odd in cases where the "arguments" are
+ * completely illegal syntactically, but that isn't worth the hassle of
+ * fixing.
+ *
+ * We also can't do anything about cases that are legal Python 3 syntax
+ * but mean something entirely different from what they did in Python 2
+ * (omitting the arguments entirely, printing items preceded by a unary plus
+ * or minus, using the stream redirection syntax).
+ */
+
+static int
+_check_for_legacy_statements(PySyntaxErrorObject *self, Py_ssize_t start)
+{
+    /* Return values:
+     *   -1: an error occurred
+     *    0: nothing happened
+     *    1: the check triggered & the error message was changed
+     */
+    static PyObject *print_prefix = NULL;
+    static PyObject *exec_prefix = NULL;
+    Py_ssize_t text_len = PyUnicode_GET_LENGTH(self->text);
+    int kind = PyUnicode_KIND(self->text);
+    void *data = PyUnicode_DATA(self->text);
+
+    /* Ignore leading whitespace */
+    while (start < text_len) {
+        Py_UCS4 ch = PyUnicode_READ(kind, data, start);
+        if (!Py_UNICODE_ISSPACE(ch))
+            break;
+        start++;
+    }
+    /* Checking against an empty or whitespace-only part of the string */
+    if (start == text_len) {
+        return 0;
+    }
+
+    /* Check for legacy print statements */
+    if (print_prefix == NULL) {
+        print_prefix = PyUnicode_InternFromString("print ");
+        if (print_prefix == NULL) {
+            return -1;
+        }
+    }
+    if (PyUnicode_Tailmatch(self->text, print_prefix,
+                            start, text_len, -1)) {
+        Py_CLEAR(self->msg);
+        self->msg = PyUnicode_FromString(
+                   "Missing parentheses in call to 'print'");
+        return 1;
+    }
+
+    /* Check for legacy exec statements */
+    if (exec_prefix == NULL) {
+        exec_prefix = PyUnicode_InternFromString("exec ");
+        if (exec_prefix == NULL) {
+            return -1;
+        }
+    }
+    if (PyUnicode_Tailmatch(self->text, exec_prefix,
+                            start, text_len, -1)) {
+        Py_CLEAR(self->msg);
+        self->msg = PyUnicode_FromString(
+                    "Missing parentheses in call to 'exec'");
+        return 1;
+    }
+    /* Fall back to the default error message */
+    return 0;
+}
+
+static int
+_report_missing_parentheses(PySyntaxErrorObject *self)
+{
+    Py_UCS4 left_paren = 40;
+    Py_ssize_t left_paren_index;
+    Py_ssize_t text_len = PyUnicode_GET_LENGTH(self->text);
+    int legacy_check_result = 0;
+
+    /* Skip entirely if there is an opening parenthesis */
+    left_paren_index = PyUnicode_FindChar(self->text, left_paren,
+                                          0, text_len, 1);
+    if (left_paren_index < -1) {
+        return -1;
+    }
+    if (left_paren_index != -1) {
+        /* Use default error message for any line with an opening paren */
+        return 0;
+    }
+    /* Handle the simple statement case */
+    legacy_check_result = _check_for_legacy_statements(self, 0);
+    if (legacy_check_result < 0) {
+        return -1;
+
+    }
+    if (legacy_check_result == 0) {
+        /* Handle the one-line complex statement case */
+        Py_UCS4 colon = 58;
+        Py_ssize_t colon_index;
+        colon_index = PyUnicode_FindChar(self->text, colon,
+                                         0, text_len, 1);
+        if (colon_index < -1) {
+            return -1;
+        }
+        if (colon_index >= 0 && colon_index < text_len) {
+            /* Check again, starting from just after the colon */
+            if (_check_for_legacy_statements(self, colon_index+1) < 0) {
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 98f42a9..1a93a6d 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -26,8 +26,8 @@ extern "C" {
 /* External C interface */
 
 PyObject *
-PyFile_FromFd(int fd, char *name, char *mode, int buffering, char *encoding,
-              char *errors, char *newline, int closefd)
+PyFile_FromFd(int fd, const char *name, const char *mode, int buffering, const char *encoding,
+              const char *errors, const char *newline, int closefd)
 {
     PyObject *io, *stream;
     _Py_IDENTIFIER(open);
@@ -372,8 +372,11 @@ PyFile_NewStdPrinter(int fd)
 static PyObject *
 stdprinter_write(PyStdPrinter_Object *self, PyObject *args)
 {
-    char *c;
+    PyObject *unicode;
+    PyObject *bytes = NULL;
+    char *str;
     Py_ssize_t n;
+    int err;
 
     if (self->fd < 0) {
         /* fd might be invalid on Windows
@@ -383,25 +386,41 @@ stdprinter_write(PyStdPrinter_Object *self, PyObject *args)
         Py_RETURN_NONE;
     }
 
-    if (!PyArg_ParseTuple(args, "s", &c)) {
+    if (!PyArg_ParseTuple(args, "U", &unicode))
         return NULL;
+
+    /* encode Unicode to UTF-8 */
+    str = PyUnicode_AsUTF8AndSize(unicode, &n);
+    if (str == NULL) {
+        PyErr_Clear();
+        bytes = _PyUnicode_AsUTF8String(unicode, "backslashreplace");
+        if (bytes == NULL)
+            return NULL;
+        if (PyBytes_AsStringAndSize(bytes, &str, &n) < 0) {
+            Py_DECREF(bytes);
+            return NULL;
+        }
     }
-    n = strlen(c);
 
     Py_BEGIN_ALLOW_THREADS
     errno = 0;
-#if defined(MS_WIN64) || defined(MS_WINDOWS)
+#ifdef MS_WINDOWS
     if (n > INT_MAX)
         n = INT_MAX;
-    n = write(self->fd, c, (int)n);
+    n = write(self->fd, str, (int)n);
 #else
-    n = write(self->fd, c, n);
+    n = write(self->fd, str, n);
 #endif
+    /* save errno, it can be modified indirectly by Py_XDECREF() */
+    err = errno;
     Py_END_ALLOW_THREADS
 
+    Py_XDECREF(bytes);
+
     if (n < 0) {
-        if (errno == EAGAIN)
+        if (err == EAGAIN)
             Py_RETURN_NONE;
+        errno = err;
         PyErr_SetFromErrno(PyExc_IOError);
         return NULL;
     }
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index a08a852..acd88d6 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -9,11 +9,6 @@
 #include <ctype.h>
 #include <float.h>
 
-#undef MAX
-#undef MIN
-#define MAX(x, y) ((x) < (y) ? (y) : (x))
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-
 
 /* Special free list
    free_list is a singly-linked list of available PyFloatObjects, linked
@@ -114,7 +109,7 @@ PyFloat_GetInfo(void)
 PyObject *
 PyFloat_FromDouble(double fval)
 {
-    register PyFloatObject *op = free_list;
+    PyFloatObject *op = free_list;
     if (op != NULL) {
         free_list = (PyFloatObject *) Py_TYPE(op);
         numfree--;
@@ -124,7 +119,7 @@ PyFloat_FromDouble(double fval)
             return PyErr_NoMemory();
     }
     /* Inline PyObject_New */
-    PyObject_INIT(op, &PyFloat_Type);
+    (void)PyObject_INIT(op, &PyFloat_Type);
     op->ob_fval = fval;
     return (PyObject *) op;
 }
@@ -136,6 +131,7 @@ PyFloat_FromString(PyObject *v)
     double x;
     PyObject *s_buffer = NULL;
     Py_ssize_t len;
+    Py_buffer view = {NULL, NULL};
     PyObject *result = NULL;
 
     if (PyUnicode_Check(v)) {
@@ -148,9 +144,29 @@ PyFloat_FromString(PyObject *v)
             return NULL;
         }
     }
-    else if (PyObject_AsCharBuffer(v, &s, &len)) {
-        PyErr_SetString(PyExc_TypeError,
-            "float() argument must be a string or a number");
+    else if (PyBytes_Check(v)) {
+        s = PyBytes_AS_STRING(v);
+        len = PyBytes_GET_SIZE(v);
+    }
+    else if (PyByteArray_Check(v)) {
+        s = PyByteArray_AS_STRING(v);
+        len = PyByteArray_GET_SIZE(v);
+    }
+    else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) {
+        s = (const char *)view.buf;
+        len = view.len;
+        /* Copy to NUL-terminated buffer. */
+        s_buffer = PyBytes_FromStringAndSize(s, len);
+        if (s_buffer == NULL) {
+            PyBuffer_Release(&view);
+            return NULL;
+        }
+        s = PyBytes_AS_STRING(s_buffer);
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+            "float() argument must be a string or a number, not '%.200s'",
+            Py_TYPE(v)->tp_name);
         return NULL;
     }
     last = s + len;
@@ -174,6 +190,7 @@ PyFloat_FromString(PyObject *v)
     else
         result = PyFloat_FromDouble(x);
 
+    PyBuffer_Release(&view);
     Py_XDECREF(s_buffer);
     return result;
 }
@@ -218,6 +235,7 @@ PyFloat_AsDouble(PyObject *op)
     if (fo == NULL)
         return -1;
     if (!PyFloat_Check(fo)) {
+        Py_DECREF(fo);
         PyErr_SetString(PyExc_TypeError,
                         "nb_float should return float object");
         return -1;
@@ -246,7 +264,7 @@ PyFloat_AsDouble(PyObject *op)
 static int
 convert_to_double(PyObject **v, double *dbl)
 {
-    register PyObject *obj = *v;
+    PyObject *obj = *v;
 
     if (PyLong_Check(obj)) {
         *dbl = PyLong_AsDouble(obj);
@@ -1131,7 +1149,7 @@ float_hex(PyObject *v)
     }
 
     m = frexp(fabs(x), &e);
-    shift = 1 - MAX(DBL_MIN_EXP - e, 0);
+    shift = 1 - Py_MAX(DBL_MIN_EXP - e, 0);
     m = ldexp(m, shift);
     e -= shift;
 
@@ -1285,8 +1303,8 @@ float_fromhex(PyObject *cls, PyObject *arg)
     fdigits = coeff_end - s_store;
     if (ndigits == 0)
         goto parse_error;
-    if (ndigits > MIN(DBL_MIN_EXP - DBL_MANT_DIG - LONG_MIN/2,
-                      LONG_MAX/2 + 1 - DBL_MAX_EXP)/4)
+    if (ndigits > Py_MIN(DBL_MIN_EXP - DBL_MANT_DIG - LONG_MIN/2,
+                         LONG_MAX/2 + 1 - DBL_MAX_EXP)/4)
         goto insane_length_error;
 
     /* [p <exponent>] */
@@ -1342,7 +1360,7 @@ float_fromhex(PyObject *cls, PyObject *arg)
 
     /* lsb = exponent of least significant bit of the *rounded* value.
        This is top_exp - DBL_MANT_DIG unless result is subnormal. */
-    lsb = MAX(top_exp, (long)DBL_MIN_EXP) - DBL_MANT_DIG;
+    lsb = Py_MAX(top_exp, (long)DBL_MIN_EXP) - DBL_MANT_DIG;
 
     x = 0.0;
     if (exp >= lsb) {
@@ -1421,7 +1439,7 @@ Create a floating-point number from a hexadecimal string.\n\
 >>> float.fromhex('0x1.ffffp10')\n\
 2047.984375\n\
 >>> float.fromhex('-0x1p-1074')\n\
--4.9406564584124654e-324");
+-5e-324");
 
 
 static PyObject *
@@ -1549,7 +1567,7 @@ float_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     tmp = float_new(&PyFloat_Type, args, kwds);
     if (tmp == NULL)
         return NULL;
-    assert(PyFloat_CheckExact(tmp));
+    assert(PyFloat_Check(tmp));
     newobj = type->tp_alloc(type, 0);
     if (newobj == NULL) {
         Py_DECREF(tmp);
@@ -1711,7 +1729,7 @@ float__format__(PyObject *self, PyObject *args)
     if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
         return NULL;
 
-    _PyUnicodeWriter_Init(&writer, 0);
+    _PyUnicodeWriter_Init(&writer);
     ret = _PyFloat_FormatAdvancedWriter(
         &writer,
         self,
@@ -1858,7 +1876,7 @@ PyTypeObject PyFloat_Type = {
     float_new,                                  /* tp_new */
 };
 
-void
+int
 _PyFloat_Init(void)
 {
     /* We attempt to determine if this machine is using IEEE
@@ -1908,8 +1926,11 @@ _PyFloat_Init(void)
     float_format = detected_float_format;
 
     /* Init float info */
-    if (FloatInfoType.tp_name == 0)
-        PyStructSequence_InitType(&FloatInfoType, &floatinfo_desc);
+    if (FloatInfoType.tp_name == NULL) {
+        if (PyStructSequence_InitType2(&FloatInfoType, &floatinfo_desc) < 0)
+            return 0;
+    }
+    return 1;
 }
 
 int
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index b312130..55ee563 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -7,11 +7,6 @@
 #include "opcode.h"
 #include "structmember.h"
 
-#undef MIN
-#undef MAX
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
 #define OFF(x) offsetof(PyFrameObject, x)
 
 static PyMemberDef frame_memberlist[] = {
@@ -26,7 +21,8 @@ static PyMemberDef frame_memberlist[] = {
 static PyObject *
 frame_getlocals(PyFrameObject *f, void *closure)
 {
-    PyFrame_FastToLocals(f);
+    if (PyFrame_FastToLocalsWithError(f) < 0)
+        return NULL;
     Py_INCREF(f->f_locals);
     return f->f_locals;
 }
@@ -160,8 +156,8 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
 
     /* We're now ready to look at the bytecode. */
     PyBytes_AsStringAndSize(f->f_code->co_code, (char **)&code, &code_len);
-    min_addr = MIN(new_lasti, f->f_lasti);
-    max_addr = MAX(new_lasti, f->f_lasti);
+    min_addr = Py_MIN(new_lasti, f->f_lasti);
+    max_addr = Py_MAX(new_lasti, f->f_lasti);
 
     /* You can't jump onto a line with an 'except' statement on it -
      * they expect to have an exception on the top of the stack, which
@@ -293,7 +289,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
             break;
         }
 
-        min_delta_iblock = MIN(min_delta_iblock, delta_iblock);
+        min_delta_iblock = Py_MIN(min_delta_iblock, delta_iblock);
 
         if (op >= HAVE_ARGUMENT) {
             addr += 2;
@@ -466,7 +462,7 @@ static int
 frame_traverse(PyFrameObject *f, visitproc visit, void *arg)
 {
     PyObject **fastlocals, **p;
-    int i, slots;
+    Py_ssize_t i, slots;
 
     Py_VISIT(f->f_back);
     Py_VISIT(f->f_code);
@@ -493,10 +489,10 @@ frame_traverse(PyFrameObject *f, visitproc visit, void *arg)
 }
 
 static void
-frame_clear(PyFrameObject *f)
+frame_tp_clear(PyFrameObject *f)
 {
     PyObject **fastlocals, **p, **oldtop;
-    int i, slots;
+    Py_ssize_t i, slots;
 
     /* Before anything else, make sure that this frame is clearly marked
      * as being defunct!  Else, e.g., a generator reachable from this
@@ -505,6 +501,7 @@ frame_clear(PyFrameObject *f)
      */
     oldtop = f->f_stacktop;
     f->f_stacktop = NULL;
+    f->f_executing = 0;
 
     Py_CLEAR(f->f_exc_type);
     Py_CLEAR(f->f_exc_value);
@@ -525,6 +522,25 @@ frame_clear(PyFrameObject *f)
 }
 
 static PyObject *
+frame_clear(PyFrameObject *f)
+{
+    if (f->f_executing) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "cannot clear an executing frame");
+        return NULL;
+    }
+    if (f->f_gen) {
+        _PyGen_Finalize(f->f_gen);
+        assert(f->f_gen == NULL);
+    }
+    frame_tp_clear(f);
+    Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(clear__doc__,
+"F.clear(): clear most references held by the frame");
+
+static PyObject *
 frame_sizeof(PyFrameObject *f)
 {
     Py_ssize_t res, extras, ncells, nfrees;
@@ -543,6 +559,8 @@ PyDoc_STRVAR(sizeof__doc__,
 "F.__sizeof__() -> size of F in memory, in bytes");
 
 static PyMethodDef frame_methods[] = {
+    {"clear",           (PyCFunction)frame_clear,       METH_NOARGS,
+     clear__doc__},
     {"__sizeof__",      (PyCFunction)frame_sizeof,      METH_NOARGS,
      sizeof__doc__},
     {NULL,              NULL}   /* sentinel */
@@ -571,7 +589,7 @@ PyTypeObject PyFrame_Type = {
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
     0,                                          /* tp_doc */
     (traverseproc)frame_traverse,               /* tp_traverse */
-    (inquiry)frame_clear,                       /* tp_clear */
+    (inquiry)frame_tp_clear,                    /* tp_clear */
     0,                                          /* tp_richcompare */
     0,                                          /* tp_weaklistoffset */
     0,                                          /* tp_iter */
@@ -583,13 +601,13 @@ PyTypeObject PyFrame_Type = {
     0,                                          /* tp_dict */
 };
 
-static PyObject *builtin_object;
+_Py_IDENTIFIER(__builtins__);
 
 int _PyFrame_Init()
 {
-    builtin_object = PyUnicode_InternFromString("__builtins__");
-    if (builtin_object == NULL)
-        return 0;
+    /* Before, PyId___builtins__ was a string created explicitly in
+       this function. Now there is nothing to initialize anymore, but
+       the function is kept for backward compatibility. */
     return 1;
 }
 
@@ -610,7 +628,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
     }
 #endif
     if (back == NULL || back->f_globals != globals) {
-        builtins = PyDict_GetItem(globals, builtin_object);
+        builtins = _PyDict_GetItemId(globals, &PyId___builtins__);
         if (builtins) {
             if (PyModule_Check(builtins)) {
                 builtins = PyModule_GetDict(builtins);
@@ -708,11 +726,12 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
         Py_INCREF(locals);
         f->f_locals = locals;
     }
-    f->f_tstate = tstate;
 
     f->f_lasti = -1;
     f->f_lineno = code->co_firstlineno;
     f->f_iblock = 0;
+    f->f_executing = 0;
+    f->f_gen = NULL;
 
     _PyObject_GC_TRACK(f);
     return f;
@@ -753,12 +772,9 @@ PyFrame_BlockPop(PyFrameObject *f)
    If deref is true, then the values being copied are cell variables
    and the value is extracted from the cell variable before being put
    in dict.
-
-   Exceptions raised while modifying the dict are silently ignored,
-   because there is no good way to report them.
  */
 
-static void
+static int
 map_to_dict(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values,
             int deref)
 {
@@ -770,19 +786,24 @@ map_to_dict(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values,
         PyObject *key = PyTuple_GET_ITEM(map, j);
         PyObject *value = values[j];
         assert(PyUnicode_Check(key));
-        if (deref) {
+        if (deref && value != NULL) {
             assert(PyCell_Check(value));
             value = PyCell_GET(value);
         }
         if (value == NULL) {
-            if (PyObject_DelItem(dict, key) != 0)
-                PyErr_Clear();
+            if (PyObject_DelItem(dict, key) != 0) {
+                if (PyErr_ExceptionMatches(PyExc_KeyError))
+                    PyErr_Clear();
+                else
+                    return -1;
+            }
         }
         else {
             if (PyObject_SetItem(dict, key, value) != 0)
-                PyErr_Clear();
+                return -1;
         }
     }
+    return 0;
 }
 
 /* Copy values from the "locals" dict into the fast locals.
@@ -839,42 +860,49 @@ dict_to_map(PyObject *map, Py_ssize_t nmap, PyObject *dict, PyObject **values,
     }
 }
 
-void
-PyFrame_FastToLocals(PyFrameObject *f)
+int
+PyFrame_FastToLocalsWithError(PyFrameObject *f)
 {
     /* Merge fast locals into f->f_locals */
     PyObject *locals, *map;
     PyObject **fast;
-    PyObject *error_type, *error_value, *error_traceback;
     PyCodeObject *co;
     Py_ssize_t j;
-    int ncells, nfreevars;
-    if (f == NULL)
-        return;
+    Py_ssize_t ncells, nfreevars;
+
+    if (f == NULL) {
+        PyErr_BadInternalCall();
+        return -1;
+    }
     locals = f->f_locals;
     if (locals == NULL) {
         locals = f->f_locals = PyDict_New();
-        if (locals == NULL) {
-            PyErr_Clear(); /* Can't report it :-( */
-            return;
-        }
+        if (locals == NULL)
+            return -1;
     }
     co = f->f_code;
     map = co->co_varnames;
-    if (!PyTuple_Check(map))
-        return;
-    PyErr_Fetch(&error_type, &error_value, &error_traceback);
+    if (!PyTuple_Check(map)) {
+        PyErr_Format(PyExc_SystemError,
+                     "co_varnames must be a tuple, not %s",
+                     Py_TYPE(map)->tp_name);
+        return -1;
+    }
     fast = f->f_localsplus;
     j = PyTuple_GET_SIZE(map);
     if (j > co->co_nlocals)
         j = co->co_nlocals;
-    if (co->co_nlocals)
-        map_to_dict(map, j, locals, fast, 0);
+    if (co->co_nlocals) {
+        if (map_to_dict(map, j, locals, fast, 0) < 0)
+            return -1;
+    }
     ncells = PyTuple_GET_SIZE(co->co_cellvars);
     nfreevars = PyTuple_GET_SIZE(co->co_freevars);
     if (ncells || nfreevars) {
-        map_to_dict(co->co_cellvars, ncells,
-                    locals, fast + co->co_nlocals, 1);
+        if (map_to_dict(co->co_cellvars, ncells,
+                        locals, fast + co->co_nlocals, 1))
+            return -1;
+
         /* If the namespace is unoptimized, then one of the
            following cases applies:
            1. It does not contain free variables, because it
@@ -884,11 +912,24 @@ PyFrame_FastToLocals(PyFrameObject *f)
            into the locals dict used by the class.
         */
         if (co->co_flags & CO_OPTIMIZED) {
-            map_to_dict(co->co_freevars, nfreevars,
-                        locals, fast + co->co_nlocals + ncells, 1);
+            if (map_to_dict(co->co_freevars, nfreevars,
+                            locals, fast + co->co_nlocals + ncells, 1) < 0)
+                return -1;
         }
     }
-    PyErr_Restore(error_type, error_value, error_traceback);
+    return 0;
+}
+
+void
+PyFrame_FastToLocals(PyFrameObject *f)
+{
+    int res;
+
+    assert(!PyErr_Occurred());
+
+    res = PyFrame_FastToLocalsWithError(f);
+    if (res < 0)
+        PyErr_Clear();
 }
 
 void
@@ -900,7 +941,7 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear)
     PyObject *error_type, *error_value, *error_traceback;
     PyCodeObject *co;
     Py_ssize_t j;
-    int ncells, nfreevars;
+    Py_ssize_t ncells, nfreevars;
     if (f == NULL)
         return;
     locals = f->f_locals;
@@ -952,7 +993,6 @@ void
 PyFrame_Fini(void)
 {
     (void)PyFrame_ClearFreeList();
-    Py_CLEAR(builtin_object);
 }
 
 /* Print summary info about the state of the optimized allocator */
diff --git a/Objects/funcobject.c b/Objects/funcobject.c
index 49415b9..b043934 100644
--- a/Objects/funcobject.c
+++ b/Objects/funcobject.c
@@ -8,60 +8,59 @@
 PyObject *
 PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname)
 {
-    PyFunctionObject *op = PyObject_GC_New(PyFunctionObject,
-                                        &PyFunction_Type);
-    static PyObject *__name__ = 0;
-    if (op != NULL) {
-        PyObject *doc;
-        PyObject *consts;
-        PyObject *module;
-        op->func_weakreflist = NULL;
-        Py_INCREF(code);
-        op->func_code = code;
-        Py_INCREF(globals);
-        op->func_globals = globals;
-        op->func_name = ((PyCodeObject *)code)->co_name;
-        Py_INCREF(op->func_name);
-        op->func_defaults = NULL; /* No default arguments */
-        op->func_kwdefaults = NULL; /* No keyword only defaults */
-        op->func_closure = NULL;
-        consts = ((PyCodeObject *)code)->co_consts;
-        if (PyTuple_Size(consts) >= 1) {
-            doc = PyTuple_GetItem(consts, 0);
-            if (!PyUnicode_Check(doc))
-                doc = Py_None;
-        }
-        else
+    PyFunctionObject *op;
+    PyObject *doc, *consts, *module;
+    static PyObject *__name__ = NULL;
+
+    if (__name__ == NULL) {
+        __name__ = PyUnicode_InternFromString("__name__");
+        if (__name__ == NULL)
+            return NULL;
+    }
+
+    op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type);
+    if (op == NULL)
+        return NULL;
+
+    op->func_weakreflist = NULL;
+    Py_INCREF(code);
+    op->func_code = code;
+    Py_INCREF(globals);
+    op->func_globals = globals;
+    op->func_name = ((PyCodeObject *)code)->co_name;
+    Py_INCREF(op->func_name);
+    op->func_defaults = NULL; /* No default arguments */
+    op->func_kwdefaults = NULL; /* No keyword only defaults */
+    op->func_closure = NULL;
+
+    consts = ((PyCodeObject *)code)->co_consts;
+    if (PyTuple_Size(consts) >= 1) {
+        doc = PyTuple_GetItem(consts, 0);
+        if (!PyUnicode_Check(doc))
             doc = Py_None;
-        Py_INCREF(doc);
-        op->func_doc = doc;
-        op->func_dict = NULL;
-        op->func_module = NULL;
-        op->func_annotations = NULL;
-
-        /* __module__: If module name is in globals, use it.
-           Otherwise, use None.
-        */
-        if (!__name__) {
-            __name__ = PyUnicode_InternFromString("__name__");
-            if (!__name__) {
-                Py_DECREF(op);
-                return NULL;
-            }
-        }
-        module = PyDict_GetItem(globals, __name__);
-        if (module) {
-            Py_INCREF(module);
-            op->func_module = module;
-        }
-        if (qualname)
-            op->func_qualname = qualname;
-        else
-            op->func_qualname = op->func_name;
-        Py_INCREF(op->func_qualname);
     }
     else
-        return NULL;
+        doc = Py_None;
+    Py_INCREF(doc);
+    op->func_doc = doc;
+
+    op->func_dict = NULL;
+    op->func_module = NULL;
+    op->func_annotations = NULL;
+
+    /* __module__: If module name is in globals, use it.
+       Otherwise, use None. */
+    module = PyDict_GetItem(globals, __name__);
+    if (module) {
+        Py_INCREF(module);
+        op->func_module = module;
+    }
+    if (qualname)
+        op->func_qualname = qualname;
+    else
+        op->func_qualname = op->func_name;
+    Py_INCREF(op->func_qualname);
+
     _PyObject_GC_TRACK(op);
     return (PyObject *)op;
 }
diff --git a/Objects/genobject.c b/Objects/genobject.c
index 2e74b8c..67e6ef9 100644
--- a/Objects/genobject.c
+++ b/Objects/genobject.c
@@ -15,6 +15,31 @@ gen_traverse(PyGenObject *gen, visitproc visit, void *arg)
     return 0;
 }
 
+void
+_PyGen_Finalize(PyObject *self)
+{
+    PyGenObject *gen = (PyGenObject *)self;
+    PyObject *res;
+    PyObject *error_type, *error_value, *error_traceback;
+
+    if (gen->gi_frame == NULL || gen->gi_frame->f_stacktop == NULL)
+        /* Generator isn't paused, so no need to close */
+        return;
+
+    /* Save the current exception, if any. */
+    PyErr_Fetch(&error_type, &error_value, &error_traceback);
+
+    res = gen_close(gen, NULL);
+
+    if (res == NULL)
+        PyErr_WriteUnraisable(self);
+    else
+        Py_DECREF(res);
+
+    /* Restore the saved exception. */
+    PyErr_Restore(error_type, error_value, error_traceback);
+}
+
 static void
 gen_dealloc(PyGenObject *gen)
 {
@@ -27,12 +52,8 @@ gen_dealloc(PyGenObject *gen)
 
     _PyObject_GC_TRACK(self);
 
-    if (gen->gi_frame != NULL && gen->gi_frame->f_stacktop != NULL) {
-        /* Generator is paused, so we need to close */
-        Py_TYPE(gen)->tp_del(self);
-        if (self->ob_refcnt > 0)
-            return;                     /* resurrected.  :( */
-    }
+    if (PyObject_CallFinalizerFromDealloc(self))
+        return;                     /* resurrected.  :( */
 
     _PyObject_GC_UNTRACK(self);
     Py_CLEAR(gen->gi_frame);
@@ -40,7 +61,6 @@ gen_dealloc(PyGenObject *gen)
     PyObject_GC_Del(gen);
 }
 
-
 static PyObject *
 gen_send_ex(PyGenObject *gen, PyObject *arg, int exc)
 {
@@ -76,7 +96,6 @@ gen_send_ex(PyGenObject *gen, PyObject *arg, int exc)
 
     /* Generators always return to their most recent caller, not
      * necessarily their creator. */
-    f->f_tstate = tstate;
     Py_XINCREF(tstate->frame);
     assert(f->f_back == NULL);
     f->f_back = tstate->frame;
@@ -90,8 +109,6 @@ gen_send_ex(PyGenObject *gen, PyObject *arg, int exc)
      * cycle. */
     assert(f->f_back == tstate->frame);
     Py_CLEAR(f->f_back);
-    /* Clear the borrowed reference to the thread state */
-    f->f_tstate = NULL;
 
     /* If the generator just returned (as opposed to yielding), signal
      * that the generator is exhausted. */
@@ -123,6 +140,7 @@ gen_send_ex(PyGenObject *gen, PyObject *arg, int exc)
         Py_XDECREF(t);
         Py_XDECREF(v);
         Py_XDECREF(tb);
+        gen->gi_frame->f_gen = NULL;
         gen->gi_frame = NULL;
         Py_DECREF(f);
     }
@@ -225,68 +243,6 @@ gen_close(PyGenObject *gen, PyObject *args)
     return NULL;
 }
 
-static void
-gen_del(PyObject *self)
-{
-    PyObject *res;
-    PyObject *error_type, *error_value, *error_traceback;
-    PyGenObject *gen = (PyGenObject *)self;
-
-    if (gen->gi_frame == NULL || gen->gi_frame->f_stacktop == NULL)
-        /* Generator isn't paused, so no need to close */
-        return;
-
-    /* Temporarily resurrect the object. */
-    assert(self->ob_refcnt == 0);
-    self->ob_refcnt = 1;
-
-    /* Save the current exception, if any. */
-    PyErr_Fetch(&error_type, &error_value, &error_traceback);
-
-    res = gen_close(gen, NULL);
-
-    if (res == NULL)
-        PyErr_WriteUnraisable(self);
-    else
-        Py_DECREF(res);
-
-    /* Restore the saved exception. */
-    PyErr_Restore(error_type, error_value, error_traceback);
-
-    /* Undo the temporary resurrection; can't use DECREF here, it would
-     * cause a recursive call.
-     */
-    assert(self->ob_refcnt > 0);
-    if (--self->ob_refcnt == 0)
-        return; /* this is the normal path out */
-
-    /* close() resurrected it!  Make it look like the original Py_DECREF
-     * never happened.
-     */
-    {
-        Py_ssize_t refcnt = self->ob_refcnt;
-        _Py_NewReference(self);
-        self->ob_refcnt = refcnt;
-    }
-    assert(PyType_IS_GC(Py_TYPE(self)) &&
-           _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED);
-
-    /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so
-     * we need to undo that. */
-    _Py_DEC_REFTOTAL;
-    /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object
-     * chain, so no more to do there.
-     * If COUNT_ALLOCS, the original decref bumped tp_frees, and
-     * _Py_NewReference bumped tp_allocs:  both of those need to be
-     * undone.
-     */
-#ifdef COUNT_ALLOCS
-    --(Py_TYPE(self)->tp_frees);
-    --(Py_TYPE(self)->tp_allocs);
-#endif
-}
-
-
 
 PyDoc_STRVAR(throw_doc,
 "throw(typ[,val[,tb]]) -> raise exception in generator,\n\
@@ -440,13 +396,29 @@ _PyGen_FetchStopIterationValue(PyObject **pvalue) {
 
     if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
         PyErr_Fetch(&et, &ev, &tb);
-        Py_XDECREF(et);
-        Py_XDECREF(tb);
         if (ev) {
-            value = ((PyStopIterationObject *)ev)->value;
-            Py_INCREF(value);
-            Py_DECREF(ev);
+            /* exception will usually be normalised already */
+            if (PyObject_TypeCheck(ev, (PyTypeObject *) et)) {
+                value = ((PyStopIterationObject *)ev)->value;
+                Py_INCREF(value);
+                Py_DECREF(ev);
+            } else if (et == PyExc_StopIteration) {
+                /* avoid normalisation and take ev as value */
+                value = ev;
+            } else {
+                /* normalisation required */
+                PyErr_NormalizeException(&et, &ev, &tb);
+                if (!PyObject_TypeCheck(ev, (PyTypeObject *)PyExc_StopIteration)) {
+                    PyErr_Restore(et, ev, tb);
+                    return -1;
+                }
+                value = ((PyStopIterationObject *)ev)->value;
+                Py_INCREF(value);
+                Py_DECREF(ev);
+            }
         }
+        Py_XDECREF(et);
+        Py_XDECREF(tb);
     } else if (PyErr_Occurred()) {
         return -1;
     }
@@ -520,7 +492,8 @@ PyTypeObject PyGen_Type = {
     PyObject_GenericGetAttr,                    /* tp_getattro */
     0,                                          /* tp_setattro */
     0,                                          /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+        Py_TPFLAGS_HAVE_FINALIZE,               /* tp_flags */
     0,                                          /* tp_doc */
     (traverseproc)gen_traverse,                 /* tp_traverse */
     0,                                          /* tp_clear */
@@ -547,7 +520,9 @@ PyTypeObject PyGen_Type = {
     0,                                          /* tp_cache */
     0,                                          /* tp_subclasses */
     0,                                          /* tp_weaklist */
-    gen_del,                                    /* tp_del */
+    0,                                          /* tp_del */
+    0,                                          /* tp_version_tag */
+    _PyGen_Finalize,                            /* tp_finalize */
 };
 
 PyObject *
@@ -559,6 +534,7 @@ PyGen_New(PyFrameObject *f)
         return NULL;
     }
     gen->gi_frame = f;
+    f->f_gen = (PyObject *) gen;
     Py_INCREF(f->f_code);
     gen->gi_code = (PyObject *)(f->f_code);
     gen->gi_running = 0;
diff --git a/Objects/iterobject.c b/Objects/iterobject.c
index 3cfbeaf..3047d6b 100644
--- a/Objects/iterobject.c
+++ b/Objects/iterobject.c
@@ -4,7 +4,7 @@
 
 typedef struct {
     PyObject_HEAD
-    long      it_index;
+    Py_ssize_t it_index;
     PyObject *it_seq; /* Set to NULL when iterator is exhausted */
 } seqiterobject;
 
@@ -54,6 +54,11 @@ iter_iternext(PyObject *iterator)
     seq = it->it_seq;
     if (seq == NULL)
         return NULL;
+    if (it->it_index == PY_SSIZE_T_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "iter index too large");
+        return NULL;
+    }
 
     result = PySequence_GetItem(seq, it->it_index);
     if (result != NULL) {
@@ -76,9 +81,14 @@ iter_len(seqiterobject *it)
     Py_ssize_t seqsize, len;
 
     if (it->it_seq) {
-        seqsize = PySequence_Size(it->it_seq);
-        if (seqsize == -1)
-            return NULL;
+        if (_PyObject_HasLen(it->it_seq)) {
+            seqsize = PySequence_Size(it->it_seq);
+            if (seqsize == -1)
+                return NULL;
+        }
+        else {
+            Py_RETURN_NOTIMPLEMENTED;
+        }
         len = seqsize - it->it_index;
         if (len >= 0)
             return PyLong_FromSsize_t(len);
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 297edf1..19967ca 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -213,11 +213,11 @@ PyList_GetItem(PyObject *op, Py_ssize_t i)
 }
 
 int
-PyList_SetItem(register PyObject *op, register Py_ssize_t i,
-               register PyObject *newitem)
+PyList_SetItem(PyObject *op, Py_ssize_t i,
+               PyObject *newitem)
 {
-    register PyObject *olditem;
-    register PyObject **p;
+    PyObject *olditem;
+    PyObject **p;
     if (!PyList_Check(op)) {
         Py_XDECREF(newitem);
         PyErr_BadInternalCall();
@@ -338,57 +338,57 @@ static PyObject *
 list_repr(PyListObject *v)
 {
     Py_ssize_t i;
-    PyObject *s = NULL;
-    _PyAccu acc;
-    static PyObject *sep = NULL;
+    PyObject *s;
+    _PyUnicodeWriter writer;
 
     if (Py_SIZE(v) == 0) {
         return PyUnicode_FromString("[]");
     }
 
-    if (sep == NULL) {
-        sep = PyUnicode_FromString(", ");
-        if (sep == NULL)
-            return NULL;
-    }
-
     i = Py_ReprEnter((PyObject*)v);
     if (i != 0) {
         return i > 0 ? PyUnicode_FromString("[...]") : NULL;
     }
 
-    if (_PyAccu_Init(&acc))
-        goto error;
+    _PyUnicodeWriter_Init(&writer);
+    writer.overallocate = 1;
+    /* "[" + "1" + ", 2" * (len - 1) + "]" */
+    writer.min_length = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1;
 
-    s = PyUnicode_FromString("[");
-    if (s == NULL || _PyAccu_Accumulate(&acc, s))
+    if (_PyUnicodeWriter_WriteChar(&writer, '[') < 0)
         goto error;
-    Py_CLEAR(s);
 
     /* Do repr() on each element.  Note that this may mutate the list,
        so must refetch the list size on each iteration. */
     for (i = 0; i < Py_SIZE(v); ++i) {
+        if (i > 0) {
+            if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
+                goto error;
+        }
+
         if (Py_EnterRecursiveCall(" while getting the repr of a list"))
             goto error;
         s = PyObject_Repr(v->ob_item[i]);
         Py_LeaveRecursiveCall();
-        if (i > 0 && _PyAccu_Accumulate(&acc, sep))
+        if (s == NULL)
             goto error;
-        if (s == NULL || _PyAccu_Accumulate(&acc, s))
+
+        if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) {
+            Py_DECREF(s);
             goto error;
-        Py_CLEAR(s);
+        }
+        Py_DECREF(s);
     }
-    s = PyUnicode_FromString("]");
-    if (s == NULL || _PyAccu_Accumulate(&acc, s))
+
+    writer.overallocate = 0;
+    if (_PyUnicodeWriter_WriteChar(&writer, ']') < 0)
         goto error;
-    Py_CLEAR(s);
 
     Py_ReprLeave((PyObject *)v);
-    return _PyAccu_Finish(&acc);
+    return _PyUnicodeWriter_Finish(&writer);
 
 error:
-    _PyAccu_Destroy(&acc);
-    Py_XDECREF(s);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_ReprLeave((PyObject *)v);
     return NULL;
 }
@@ -644,9 +644,14 @@ list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v)
     memcpy(recycle, &item[ilow], s);
 
     if (d < 0) { /* Delete -d items */
-        memmove(&item[ihigh+d], &item[ihigh],
-            (Py_SIZE(a) - ihigh)*sizeof(PyObject *));
-        list_resize(a, Py_SIZE(a) + d);
+        Py_ssize_t tail;
+        tail = (Py_SIZE(a) - ihigh) * sizeof(PyObject *);
+        memmove(&item[ihigh+d], &item[ihigh], tail);
+        if (list_resize(a, Py_SIZE(a) + d) < 0) {
+            memmove(&item[ihigh], &item[ihigh+d], tail);
+            memcpy(&item[ilow], recycle, s);
+            goto Error;
+        }
         item = a->ob_item;
     }
     else if (d > 0) { /* Insert d items */
@@ -826,7 +831,7 @@ listextend(PyListObject *self, PyObject *b)
     iternext = *it->ob_type->tp_iternext;
 
     /* Guess a result list size. */
-    n = _PyObject_LengthHint(b, 8);
+    n = PyObject_LengthHint(b, 8);
     if (n == -1) {
         Py_DECREF(it);
         return NULL;
@@ -871,8 +876,10 @@ listextend(PyListObject *self, PyObject *b)
     }
 
     /* Cut back result list if initial guess was too large. */
-    if (Py_SIZE(self) < self->allocated)
-        list_resize(self, Py_SIZE(self));  /* shrinking can't fail */
+    if (Py_SIZE(self) < self->allocated) {
+        if (list_resize(self, Py_SIZE(self)) < 0)
+            goto error;
+    }
 
     Py_DECREF(it);
     Py_RETURN_NONE;
@@ -925,17 +932,17 @@ listpop(PyListObject *self, PyObject *args)
     v = self->ob_item[i];
     if (i == Py_SIZE(self) - 1) {
         status = list_resize(self, Py_SIZE(self) - 1);
-        assert(status >= 0);
-        return v; /* and v now owns the reference the list had */
+        if (status >= 0)
+            return v; /* and v now owns the reference the list had */
+        else
+            return NULL;
     }
     Py_INCREF(v);
     status = list_ass_slice(self, i, i+1, (PyObject *)NULL);
-    assert(status >= 0);
-    /* Use status, so that in a release build compilers don't
-     * complain about the unused name.
-     */
-    (void) status;
-
+    if (status < 0) {
+        Py_DECREF(v);
+        return NULL;
+    }
     return v;
 }
 
@@ -1051,9 +1058,9 @@ sortslice_advance(sortslice *slice, Py_ssize_t n)
 static int
 binarysort(sortslice lo, PyObject **hi, PyObject **start)
 {
-    register Py_ssize_t k;
-    register PyObject **l, **p, **r;
-    register PyObject *pivot;
+    Py_ssize_t k;
+    PyObject **l, **p, **r;
+    PyObject *pivot;
 
     assert(lo.keys <= start && start <= hi);
     /* assert [lo, start) is sorted */
@@ -1825,7 +1832,8 @@ merge_collapse(MergeState *ms)
     assert(ms);
     while (ms->n > 1) {
         Py_ssize_t n = ms->n - 2;
-        if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) {
+        if ((n > 0 && p[n-1].len <= p[n].len + p[n+1].len) ||
+            (n > 1 && p[n-2].len <= p[n-1].len + p[n].len)) {
             if (p[n-1].len < p[n+1].len)
                 --n;
             if (merge_at(ms, n) < 0)
@@ -2478,6 +2486,7 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value)
             PyObject **garbage;
             size_t cur;
             Py_ssize_t i;
+            int res;
 
             if (slicelength <= 0)
                 return 0;
@@ -2528,14 +2537,14 @@ list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value)
             }
 
             Py_SIZE(self) -= slicelength;
-            list_resize(self, Py_SIZE(self));
+            res = list_resize(self, Py_SIZE(self));
 
             for (i = 0; i < slicelength; i++) {
                 Py_DECREF(garbage[i]);
             }
             PyMem_FREE(garbage);
 
-            return 0;
+            return res;
         }
         else {
             /* assign slice */
@@ -2662,7 +2671,7 @@ PyTypeObject PyList_Type = {
 
 typedef struct {
     PyObject_HEAD
-    long it_index;
+    Py_ssize_t it_index;
     PyListObject *it_seq; /* Set to NULL when iterator is exhausted */
 } listiterobject;
 
@@ -2799,7 +2808,7 @@ listiter_reduce(listiterobject *it)
 static PyObject *
 listiter_setstate(listiterobject *it, PyObject *state)
 {
-    long index = PyLong_AsLong(state);
+    Py_ssize_t index = PyLong_AsSsize_t(state);
     if (index == -1 && PyErr_Occurred())
         return NULL;
     if (it->it_seq != NULL) {
@@ -2962,7 +2971,7 @@ listiter_reduce_general(void *_it, int forward)
     if (forward) {
         listiterobject *it = (listiterobject *)_it;
         if (it->it_seq)
-            return Py_BuildValue("N(O)l", _PyObject_GetBuiltin("iter"),
+            return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
                                  it->it_seq, it->it_index);
     } else {
         listreviterobject *it = (listreviterobject *)_it;
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 2245ece..3a64b53 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -17,7 +17,8 @@
 #endif
 
 /* convert a PyLong of size 1, 0 or -1 to an sdigit */
-#define MEDIUM_VALUE(x) (Py_SIZE(x) < 0 ? -(sdigit)(x)->ob_digit[0] :   \
+#define MEDIUM_VALUE(x) (assert(-1 <= Py_SIZE(x) && Py_SIZE(x) <= 1),   \
+         Py_SIZE(x) < 0 ? -(sdigit)(x)->ob_digit[0] :   \
              (Py_SIZE(x) == 0 ? (sdigit)0 :                             \
               (sdigit)(x)->ob_digit[0]))
 #define ABS(x) ((x) < 0 ? -(x) : (x))
@@ -72,11 +73,21 @@ maybe_small_long(PyLongObject *v)
 
 /* If a freshly-allocated int is already shared, it must
    be a small integer, so negating it must go to PyLong_FromLong */
-#define NEGATE(x) \
-    do if (Py_REFCNT(x) == 1) Py_SIZE(x) = -Py_SIZE(x);  \
-       else { PyObject* tmp=PyLong_FromLong(-MEDIUM_VALUE(x));  \
-           Py_DECREF(x); (x) = (PyLongObject*)tmp; }               \
-    while(0)
+Py_LOCAL_INLINE(void)
+_PyLong_Negate(PyLongObject **x_p)
+{
+    PyLongObject *x;
+
+    x = (PyLongObject *)*x_p;
+    if (Py_REFCNT(x) == 1) {
+        Py_SIZE(x) = -Py_SIZE(x);
+        return;
+    }
+
+    *x_p = (PyLongObject *)PyLong_FromLong(-MEDIUM_VALUE(x));
+    Py_DECREF(x);
+}
+
 /* For int multiplication, use the O(N**2) school algorithm unless
  * both operands contain more than KARATSUBA_CUTOFF digits (this
  * being an internal Python int digit, in base BASE).
@@ -91,11 +102,6 @@ maybe_small_long(PyLongObject *v)
  */
 #define FIVEARY_CUTOFF 8
 
-#undef MIN
-#undef MAX
-#define MAX(x, y) ((x) < (y) ? (y) : (x))
-#define MIN(x, y) ((x) > (y) ? (y) : (x))
-
 #define SIGCHECK(PyTryBlock)                    \
     do {                                        \
         if (PyErr_CheckSignals()) PyTryBlock    \
@@ -106,7 +112,7 @@ maybe_small_long(PyLongObject *v)
    of the algorithms used, this could save at most be one word anyway. */
 
 static PyLongObject *
-long_normalize(register PyLongObject *v)
+long_normalize(PyLongObject *v)
 {
     Py_ssize_t j = ABS(Py_SIZE(v));
     Py_ssize_t i = j;
@@ -155,6 +161,15 @@ _PyLong_FromNbInt(PyObject *integral)
         Py_DECREF(result);
         return NULL;
     }
+    /* Issue #17576: warn if 'result' not of exact type int. */
+    if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+            "__int__ returned non-int (type %.200s).  "
+            "The ability to return an instance of a strict subclass of int "
+            "is deprecated, and may be removed in a future version of Python.",
+            result->ob_type->tp_name)) {
+        Py_DECREF(result);
+        return NULL;
+    }
     return (PyLongObject *)result;
 }
 
@@ -377,7 +392,7 @@ long
 PyLong_AsLongAndOverflow(PyObject *vv, int *overflow)
 {
     /* This version by Tim Peters */
-    register PyLongObject *v;
+    PyLongObject *v;
     unsigned long x, prev;
     long res;
     Py_ssize_t i;
@@ -489,7 +504,7 @@ _PyLong_AsInt(PyObject *obj)
 
 Py_ssize_t
 PyLong_AsSsize_t(PyObject *vv) {
-    register PyLongObject *v;
+    PyLongObject *v;
     size_t x, prev;
     Py_ssize_t i;
     int sign;
@@ -545,7 +560,7 @@ PyLong_AsSsize_t(PyObject *vv) {
 unsigned long
 PyLong_AsUnsignedLong(PyObject *vv)
 {
-    register PyLongObject *v;
+    PyLongObject *v;
     unsigned long x, prev;
     Py_ssize_t i;
 
@@ -575,7 +590,7 @@ PyLong_AsUnsignedLong(PyObject *vv)
         x = (x << PyLong_SHIFT) | v->ob_digit[i];
         if ((x >> PyLong_SHIFT) != prev) {
             PyErr_SetString(PyExc_OverflowError,
-                            "python int too large to convert "
+                            "Python int too large to convert "
                             "to C unsigned long");
             return (unsigned long) -1;
         }
@@ -589,7 +604,7 @@ PyLong_AsUnsignedLong(PyObject *vv)
 size_t
 PyLong_AsSize_t(PyObject *vv)
 {
-    register PyLongObject *v;
+    PyLongObject *v;
     size_t x, prev;
     Py_ssize_t i;
 
@@ -632,7 +647,7 @@ PyLong_AsSize_t(PyObject *vv)
 static unsigned long
 _PyLong_AsUnsignedLongMask(PyObject *vv)
 {
-    register PyLongObject *v;
+    PyLongObject *v;
     unsigned long x;
     Py_ssize_t i;
     int sign;
@@ -660,7 +675,7 @@ _PyLong_AsUnsignedLongMask(PyObject *vv)
 }
 
 unsigned long
-PyLong_AsUnsignedLongMask(register PyObject *op)
+PyLong_AsUnsignedLongMask(PyObject *op)
 {
     PyLongObject *lo;
     unsigned long val;
@@ -975,9 +990,6 @@ PyObject *
 PyLong_FromVoidPtr(void *p)
 {
 #if SIZEOF_VOID_P <= SIZEOF_LONG
-    /* special-case null pointer */
-    if (!p)
-        return PyLong_FromLong(0);
     return PyLong_FromUnsignedLong((unsigned long)(Py_uintptr_t)p);
 #else
 
@@ -987,9 +999,6 @@ PyLong_FromVoidPtr(void *p)
 #if SIZEOF_LONG_LONG < SIZEOF_VOID_P
 #   error "PyLong_FromVoidPtr: sizeof(PY_LONG_LONG) < sizeof(void*)"
 #endif
-    /* special-case null pointer */
-    if (!p)
-        return PyLong_FromLong(0);
     return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)(Py_uintptr_t)p);
 #endif /* SIZEOF_VOID_P <= SIZEOF_LONG */
 
@@ -1035,7 +1044,6 @@ PyLong_AsVoidPtr(PyObject *vv)
  * rewritten to use the newer PyLong_{As,From}ByteArray API.
  */
 
-#define IS_LITTLE_ENDIAN (int)*(unsigned char*)&one
 #define PY_ABS_LLONG_MIN (0-(unsigned PY_LONG_LONG)PY_LLONG_MIN)
 
 /* Create a new int object from a C PY_LONG_LONG int. */
@@ -1188,7 +1196,6 @@ PyLong_AsLongLong(PyObject *vv)
 {
     PyLongObject *v;
     PY_LONG_LONG bytes;
-    int one = 1;
     int res;
     int do_decref = 0; /* if nb_int was called */
 
@@ -1220,7 +1227,7 @@ PyLong_AsLongLong(PyObject *vv)
         break;
     default:
         res = _PyLong_AsByteArray((PyLongObject *)v, (unsigned char *)&bytes,
-                                  SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 1);
+                                  SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 1);
     }
     if (do_decref) {
         Py_DECREF(v);
@@ -1241,7 +1248,6 @@ PyLong_AsUnsignedLongLong(PyObject *vv)
 {
     PyLongObject *v;
     unsigned PY_LONG_LONG bytes;
-    int one = 1;
     int res;
 
     if (vv == NULL) {
@@ -1260,7 +1266,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv)
     }
 
     res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes,
-                              SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 0);
+                              SIZEOF_LONG_LONG, PY_LITTLE_ENDIAN, 0);
 
     /* Plan 9 can't handle PY_LONG_LONG in ? : expressions */
     if (res < 0)
@@ -1275,7 +1281,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv)
 static unsigned PY_LONG_LONG
 _PyLong_AsUnsignedLongLongMask(PyObject *vv)
 {
-    register PyLongObject *v;
+    PyLongObject *v;
     unsigned PY_LONG_LONG x;
     Py_ssize_t i;
     int sign;
@@ -1303,7 +1309,7 @@ _PyLong_AsUnsignedLongLongMask(PyObject *vv)
 }
 
 unsigned PY_LONG_LONG
-PyLong_AsUnsignedLongLongMask(register PyObject *op)
+PyLong_AsUnsignedLongLongMask(PyObject *op)
 {
     PyLongObject *lo;
     unsigned PY_LONG_LONG val;
@@ -1325,7 +1331,6 @@ PyLong_AsUnsignedLongLongMask(register PyObject *op)
     Py_DECREF(lo);
     return val;
 }
-#undef IS_LITTLE_ENDIAN
 
 /* Get a C long long int from an int object or any object that has an
    __int__ method.
@@ -1341,7 +1346,7 @@ PY_LONG_LONG
 PyLong_AsLongLongAndOverflow(PyObject *vv, int *overflow)
 {
     /* This version by Tim Peters */
-    register PyLongObject *v;
+    PyLongObject *v;
     unsigned PY_LONG_LONG x, prev;
     PY_LONG_LONG res;
     Py_ssize_t i;
@@ -1606,7 +1611,7 @@ long_to_decimal_string_internal(PyObject *aa,
     */
     if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) {
         PyErr_SetString(PyExc_OverflowError,
-                        "long is too large to format");
+                        "int too large to format");
         return -1;
     }
     /* the expression size_a * PyLong_SHIFT is now safe from overflow */
@@ -1676,7 +1681,6 @@ long_to_decimal_string_internal(PyObject *aa,
         else                                                          \
             p = (TYPE*)PyUnicode_DATA(str) + strlen;                  \
                                                                       \
-        *p = '\0';                                                    \
         /* pout[0] through pout[size-2] contribute exactly            \
            _PyLong_DECIMAL_SHIFT digits each */                       \
         for (i=0; i < size - 1; i++) {                                \
@@ -1749,7 +1753,7 @@ static int
 long_format_binary(PyObject *aa, int base, int alternate,
                    PyObject **p_output, _PyUnicodeWriter *writer)
 {
-    register PyLongObject *a = (PyLongObject *)aa;
+    PyLongObject *a = (PyLongObject *)aa;
     PyObject *v;
     Py_ssize_t sz;
     Py_ssize_t size_a;
@@ -1790,7 +1794,7 @@ long_format_binary(PyObject *aa, int base, int alternate,
         /* Ensure overflow doesn't occur during computation of sz. */
         if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) {
             PyErr_SetString(PyExc_OverflowError,
-                            "int is too large to format");
+                            "int too large to format");
             return -1;
         }
         size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
@@ -1946,10 +1950,10 @@ unsigned char _PyLong_DigitValue[256] = {
  * string characters.
  */
 static PyLongObject *
-long_from_binary_base(char **str, int base)
+long_from_binary_base(const char **str, int base)
 {
-    char *p = *str;
-    char *start = p;
+    const char *p = *str;
+    const char *start = p;
     int bits_per_char;
     Py_ssize_t n;
     PyLongObject *z;
@@ -2014,10 +2018,10 @@ long_from_binary_base(char **str, int base)
  * If unsuccessful, NULL will be returned.
  */
 PyObject *
-PyLong_FromString(char *str, char **pend, int base)
+PyLong_FromString(const char *str, char **pend, int base)
 {
     int sign = 1, error_if_nonzero = 0;
-    char *start, *orig_str = str;
+    const char *start, *orig_str = str;
     PyLongObject *z = NULL;
     PyObject *strobj;
     Py_ssize_t slen;
@@ -2146,13 +2150,13 @@ that triggers it(!).  Instead the code was tested by artificially allocating
 just 1 digit at the start, so that the copying code was exercised for every
 digit beyond the first.
 ***/
-        register twodigits c;           /* current input character */
+        twodigits c;           /* current input character */
         Py_ssize_t size_z;
         int i;
         int convwidth;
         twodigits convmultmax, convmult;
         digit *pz, *pzstop;
-        char* scan;
+        const char* scan;
 
         static double log_base_BASE[37] = {0.0e0,};
         static int convwidth_base[37] = {0,};
@@ -2280,19 +2284,19 @@ digit beyond the first.
     if (z == NULL)
         return NULL;
     if (pend != NULL)
-        *pend = str;
+        *pend = (char *)str;
     return (PyObject *) z;
 
   onError:
     if (pend != NULL)
-        *pend = str;
+        *pend = (char *)str;
     Py_XDECREF(z);
     slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200;
     strobj = PyUnicode_FromStringAndSize(orig_str, slen);
     if (strobj == NULL)
         return NULL;
     PyErr_Format(PyExc_ValueError,
-                 "invalid literal for int() with base %d: %R",
+                 "invalid literal for int() with base %d: %.200R",
                  base, strobj);
     Py_DECREF(strobj);
     return NULL;
@@ -2316,7 +2320,7 @@ _PyLong_FromBytes(const char *s, Py_ssize_t len, int base)
     strobj = PyBytes_FromStringAndSize(s, Py_MIN(len, 200));
     if (strobj != NULL) {
         PyErr_Format(PyExc_ValueError,
-                     "invalid literal for int() with base %d: %R",
+                     "invalid literal for int() with base %d: %.200R",
                      base, strobj);
         Py_DECREF(strobj);
     }
@@ -2337,7 +2341,7 @@ PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
 PyObject *
 PyLong_FromUnicodeObject(PyObject *u, int base)
 {
-    PyObject *result, *asciidig, *strobj;
+    PyObject *result, *asciidig;
     char *buffer, *end = NULL;
     Py_ssize_t buflen;
 
@@ -2359,13 +2363,9 @@ PyLong_FromUnicodeObject(PyObject *u, int base)
         Py_DECREF(asciidig);
         Py_XDECREF(result);
     }
-    strobj = PySequence_GetSlice(u, 0, 200);
-    if (strobj != NULL) {
-        PyErr_Format(PyExc_ValueError,
-                     "invalid literal for int() with base %d: %R",
-                     base, strobj);
-        Py_DECREF(strobj);
-    }
+    PyErr_Format(PyExc_ValueError,
+                 "invalid literal for int() with base %d: %.200R",
+                 base, u);
     return NULL;
 }
 
@@ -2419,10 +2419,21 @@ long_divrem(PyLongObject *a, PyLongObject *b,
        The quotient z has the sign of a*b;
        the remainder r has the sign of a,
        so a = b*z + r. */
-    if ((Py_SIZE(a) < 0) != (Py_SIZE(b) < 0))
-        NEGATE(z);
-    if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0)
-        NEGATE(*prem);
+    if ((Py_SIZE(a) < 0) != (Py_SIZE(b) < 0)) {
+        _PyLong_Negate(&z);
+        if (z == NULL) {
+            Py_CLEAR(*prem);
+            return -1;
+        }
+    }
+    if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) {
+        _PyLong_Negate(prem);
+        if (*prem == NULL) {
+            Py_DECREF(z);
+            Py_CLEAR(*prem);
+            return -1;
+        }
+    }
     *pdiv = maybe_small_long(z);
     return 0;
 }
@@ -2698,7 +2709,7 @@ PyLong_AsDouble(PyObject *v)
     x = _PyLong_Frexp((PyLongObject *)v, &exponent);
     if ((x == -1.0 && PyErr_Occurred()) || exponent > DBL_MAX_EXP) {
         PyErr_SetString(PyExc_OverflowError,
-                        "long int too large to convert to float");
+                        "int too large to convert to float");
         return -1.0;
     }
     return ldexp(x, (int)exponent);
@@ -2918,8 +2929,11 @@ x_sub(PyLongObject *a, PyLongObject *b)
         borrow &= 1; /* Keep only one sign bit */
     }
     assert(borrow == 0);
-    if (sign < 0)
-        NEGATE(z);
+    if (sign < 0) {
+        _PyLong_Negate(&z);
+        if (z == NULL)
+            return NULL;
+    }
     return long_normalize(z);
 }
 
@@ -3086,7 +3100,7 @@ kmul_split(PyLongObject *n,
     Py_ssize_t size_lo, size_hi;
     const Py_ssize_t size_n = ABS(Py_SIZE(n));
 
-    size_lo = MIN(size_n, size);
+    size_lo = Py_MIN(size_n, size);
     size_hi = size_n - size_lo;
 
     if ((hi = _PyLong_New(size_hi)) == NULL)
@@ -3357,7 +3371,7 @@ k_lopsided_mul(PyLongObject *a, PyLongObject *b)
     nbdone = 0;
     while (bsize > 0) {
         PyLongObject *product;
-        const Py_ssize_t nbtouse = MIN(bsize, asize);
+        const Py_ssize_t nbtouse = Py_MIN(bsize, asize);
 
         /* Multiply the next slice of b by a. */
         memcpy(bslice->ob_digit, b->ob_digit + nbdone,
@@ -3410,8 +3424,11 @@ long_mul(PyLongObject *a, PyLongObject *b)
 
     z = k_mul(a, b);
     /* Negate if exactly one of the inputs is negative. */
-    if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z)
-        NEGATE(z);
+    if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z) {
+        _PyLong_Negate(&z);
+        if (z == NULL)
+            return NULL;
+    }
     return (PyObject *)z;
 }
 
@@ -3648,7 +3665,7 @@ long_true_divide(PyObject *v, PyObject *w)
         goto underflow_or_zero;
 
     /* Choose value for shift; see comments for step 1 above. */
-    shift = MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2;
+    shift = Py_MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2;
 
     inexact = 0;
 
@@ -3719,7 +3736,7 @@ long_true_divide(PyObject *v, PyObject *w)
     x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]);
 
     /* The number of extra bits that have to be rounded away. */
-    extra_bits = MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
+    extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
     assert(extra_bits == 2 || extra_bits == 3);
 
     /* Round by directly modifying the low digit of x. */
@@ -3858,7 +3875,9 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
             Py_DECREF(c);
             c = temp;
             temp = NULL;
-            NEGATE(c);
+            _PyLong_Negate(&c);
+            if (c == NULL)
+                goto Error;
         }
 
         /* if modulus == 1:
@@ -3964,10 +3983,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
     goto Done;
 
   Error:
-    if (z != NULL) {
-        Py_DECREF(z);
-        z = NULL;
-    }
+    Py_CLEAR(z);
     /* fall through */
   Done:
     if (Py_SIZE(b) > FIVEARY_CUTOFF) {
@@ -4097,10 +4113,10 @@ long_lshift(PyObject *v, PyObject *w)
 
     shiftby = PyLong_AsSsize_t((PyObject *)b);
     if (shiftby == -1L && PyErr_Occurred())
-        goto lshift_error;
+        return NULL;
     if (shiftby < 0) {
         PyErr_SetString(PyExc_ValueError, "negative shift count");
-        goto lshift_error;
+        return NULL;
     }
     /* wordshift, remshift = divmod(shiftby, PyLong_SHIFT) */
     wordshift = shiftby / PyLong_SHIFT;
@@ -4112,9 +4128,11 @@ long_lshift(PyObject *v, PyObject *w)
         ++newsize;
     z = _PyLong_New(newsize);
     if (z == NULL)
-        goto lshift_error;
-    if (Py_SIZE(a) < 0)
-        NEGATE(z);
+        return NULL;
+    if (Py_SIZE(a) < 0) {
+        assert(Py_REFCNT(z) == 1);
+        Py_SIZE(z) = -Py_SIZE(z);
+    }
     for (i = 0; i < wordshift; i++)
         z->ob_digit[i] = 0;
     accum = 0;
@@ -4128,7 +4146,6 @@ long_lshift(PyObject *v, PyObject *w)
     else
         assert(!accum);
     z = long_normalize(z);
-  lshift_error:
     return (PyObject *) maybe_small_long(z);
 }
 
@@ -4153,7 +4170,7 @@ v_complement(digit *z, digit *a, Py_ssize_t m)
 
 static PyObject *
 long_bitwise(PyLongObject *a,
-             int op,  /* '&', '|', '^' */
+             char op,  /* '&', '|', '^' */
              PyLongObject *b)
 {
     int nega, negb, negz;
@@ -4328,8 +4345,7 @@ static PyObject *
 long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
     PyObject *obase = NULL, *x = NULL;
-    long base;
-    int overflow;
+    Py_ssize_t base;
     static char *kwlist[] = {"x", "base", 0};
 
     if (type != &PyLong_Type)
@@ -4348,10 +4364,10 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     if (obase == NULL)
         return PyNumber_Long(x);
 
-    base = PyLong_AsLongAndOverflow(obase, &overflow);
+    base = PyNumber_AsSsize_t(obase, NULL);
     if (base == -1 && PyErr_Occurred())
         return NULL;
-    if (overflow || (base != 0 && base < 2) || base > 36) {
+    if ((base != 0 && base < 2) || base > 36) {
         PyErr_SetString(PyExc_ValueError,
                         "int() base must be >= 2 and <= 36");
         return NULL;
@@ -4389,7 +4405,7 @@ long_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     tmp = (PyLongObject *)long_new(&PyLong_Type, args, kwds);
     if (tmp == NULL)
         return NULL;
-    assert(PyLong_CheckExact(tmp));
+    assert(PyLong_Check(tmp));
     n = Py_SIZE(tmp);
     if (n < 0)
         n = -n;
@@ -4432,7 +4448,7 @@ long__format__(PyObject *self, PyObject *args)
     if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
         return NULL;
 
-    _PyUnicodeWriter_Init(&writer, 0);
+    _PyUnicodeWriter_Init(&writer);
     ret = _PyLong_FormatAdvancedWriter(
         &writer,
         self,
@@ -5082,15 +5098,17 @@ _PyLong_Init(void)
             assert(v->ob_digit[0] == abs(ival));
         }
         else {
-            PyObject_INIT(v, &PyLong_Type);
+            (void)PyObject_INIT(v, &PyLong_Type);
         }
         Py_SIZE(v) = size;
         v->ob_digit[0] = abs(ival);
     }
 #endif
     /* initialize int_info */
-    if (Int_InfoType.tp_name == 0)
-        PyStructSequence_InitType(&Int_InfoType, &int_info_desc);
+    if (Int_InfoType.tp_name == NULL) {
+        if (PyStructSequence_InitType2(&Int_InfoType, &int_info_desc) < 0)
+            return 0;
+    }
 
     return 1;
 }
diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c
index 189af88..0be8493 100644
--- a/Objects/memoryobject.c
+++ b/Objects/memoryobject.c
@@ -1135,6 +1135,51 @@ get_native_fmtchar(char *result, const char *fmt)
     return -1;
 }
 
+Py_LOCAL_INLINE(char *)
+get_native_fmtstr(const char *fmt)
+{
+    int at = 0;
+
+    if (fmt[0] == '@') {
+        at = 1;
+        fmt++;
+    }
+    if (fmt[0] == '\0' || fmt[1] != '\0') {
+        return NULL;
+    }
+
+#define RETURN(s) do { return at ? "@" s : s; } while (0)
+
+    switch (fmt[0]) {
+    case 'c': RETURN("c");
+    case 'b': RETURN("b");
+    case 'B': RETURN("B");
+    case 'h': RETURN("h");
+    case 'H': RETURN("H");
+    case 'i': RETURN("i");
+    case 'I': RETURN("I");
+    case 'l': RETURN("l");
+    case 'L': RETURN("L");
+    #ifdef HAVE_LONG_LONG
+    case 'q': RETURN("q");
+    case 'Q': RETURN("Q");
+    #endif
+    case 'n': RETURN("n");
+    case 'N': RETURN("N");
+    case 'f': RETURN("f");
+    case 'd': RETURN("d");
+    #ifdef HAVE_C99_BOOL
+    case '?': RETURN("?");
+    #else
+    case '?': RETURN("?");
+    #endif
+    case 'P': RETURN("P");
+    }
+
+    return NULL;
+}
+
+
 /* Cast a memoryview's data type to 'format'. The input array must be
    C-contiguous. At least one of input-format, output-format must have
    byte size. The output array is 1-D, with the same byte length as the
@@ -1184,10 +1229,13 @@ cast_to_1D(PyMemoryViewObject *mv, PyObject *format)
         goto out;
     }
 
-    strncpy(mv->format, PyBytes_AS_STRING(asciifmt),
-            _Py_MEMORYVIEW_MAX_FORMAT);
-    mv->format[_Py_MEMORYVIEW_MAX_FORMAT-1] = '\0';
-    view->format = mv->format;
+    view->format = get_native_fmtstr(PyBytes_AS_STRING(asciifmt));
+    if (view->format == NULL) {
+        /* NOT_REACHED: get_native_fmtchar() already validates the format. */
+        PyErr_SetString(PyExc_RuntimeError,
+            "memoryview: internal error");
+        goto out;
+    }
     view->itemsize = itemsize;
 
     view->ndim = 1;
@@ -2402,7 +2450,7 @@ static PyMappingMethods memory_as_mapping = {
 
 /* As sequence */
 static PySequenceMethods memory_as_sequence = {
-        0,                                /* sq_length */
+        (lenfunc)memory_length,           /* sq_length */
         0,                                /* sq_concat */
         0,                                /* sq_repeat */
         (ssizeargfunc)memory_item,        /* sq_item */
@@ -2742,7 +2790,7 @@ memory_hash(PyMemoryViewObject *self)
         }
 
         /* Can't fail */
-        self->hash = _Py_HashBytes((unsigned char *)mem, view->len);
+        self->hash = _Py_HashBytes(mem, view->len);
 
         if (mem != view->buf)
             PyMem_Free(mem);
diff --git a/Objects/methodobject.c b/Objects/methodobject.c
index f0685dd..9b6e5e4 100644
--- a/Objects/methodobject.c
+++ b/Objects/methodobject.c
@@ -13,6 +13,15 @@ static int numfree = 0;
 #define PyCFunction_MAXFREELIST 256
 #endif
 
+/* undefine macro trampoline to PyCFunction_NewEx */
+#undef PyCFunction_New
+
+PyAPI_FUNC(PyObject *)
+PyCFunction_New(PyMethodDef *ml, PyObject *self)
+{
+    return PyCFunction_NewEx(ml, self, NULL);
+}
+
 PyObject *
 PyCFunction_NewEx(PyMethodDef *ml, PyObject *self, PyObject *module)
 {
@@ -20,7 +29,7 @@ PyCFunction_NewEx(PyMethodDef *ml, PyObject *self, PyObject *module)
     op = free_list;
     if (op != NULL) {
         free_list = (PyCFunctionObject *)(op->m_self);
-        PyObject_INIT(op, &PyCFunction_Type);
+        (void)PyObject_INIT(op, &PyCFunction_Type);
         numfree--;
     }
     else {
@@ -70,23 +79,34 @@ PyCFunction_GetFlags(PyObject *op)
 PyObject *
 PyCFunction_Call(PyObject *func, PyObject *arg, PyObject *kw)
 {
+#define CHECK_RESULT(res) assert(res != NULL || PyErr_Occurred())
+
     PyCFunctionObject* f = (PyCFunctionObject*)func;
     PyCFunction meth = PyCFunction_GET_FUNCTION(func);
     PyObject *self = PyCFunction_GET_SELF(func);
+    PyObject *res;
     Py_ssize_t size;
 
     switch (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST)) {
     case METH_VARARGS:
-        if (kw == NULL || PyDict_Size(kw) == 0)
-            return (*meth)(self, arg);
+        if (kw == NULL || PyDict_Size(kw) == 0) {
+            res = (*meth)(self, arg);
+            CHECK_RESULT(res);
+            return res;
+        }
         break;
     case METH_VARARGS | METH_KEYWORDS:
-        return (*(PyCFunctionWithKeywords)meth)(self, arg, kw);
+        res = (*(PyCFunctionWithKeywords)meth)(self, arg, kw);
+        CHECK_RESULT(res);
+        return res;
     case METH_NOARGS:
         if (kw == NULL || PyDict_Size(kw) == 0) {
             size = PyTuple_GET_SIZE(arg);
-            if (size == 0)
-                return (*meth)(self, NULL);
+            if (size == 0) {
+                res = (*meth)(self, NULL);
+                CHECK_RESULT(res);
+                return res;
+            }
             PyErr_Format(PyExc_TypeError,
                 "%.200s() takes no arguments (%zd given)",
                 f->m_ml->ml_name, size);
@@ -96,8 +116,11 @@ PyCFunction_Call(PyObject *func, PyObject *arg, PyObject *kw)
     case METH_O:
         if (kw == NULL || PyDict_Size(kw) == 0) {
             size = PyTuple_GET_SIZE(arg);
-            if (size == 1)
-                return (*meth)(self, PyTuple_GET_ITEM(arg, 0));
+            if (size == 1) {
+                res = (*meth)(self, PyTuple_GET_ITEM(arg, 0));
+                CHECK_RESULT(res);
+                return res;
+            }
             PyErr_Format(PyExc_TypeError,
                 "%.200s() takes exactly one argument (%zd given)",
                 f->m_ml->ml_name, size);
@@ -114,6 +137,8 @@ PyCFunction_Call(PyObject *func, PyObject *arg, PyObject *kw)
     PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments",
                  f->m_ml->ml_name);
     return NULL;
+
+#undef CHECK_RESULT
 }
 
 /* Methods (the standard built-in methods, that is) */
@@ -135,14 +160,35 @@ meth_dealloc(PyCFunctionObject *m)
 }
 
 static PyObject *
-meth_get__doc__(PyCFunctionObject *m, void *closure)
+meth_reduce(PyCFunctionObject *m)
+{
+    PyObject *builtins;
+    PyObject *getattr;
+    _Py_IDENTIFIER(getattr);
+
+    if (m->m_self == NULL || PyModule_Check(m->m_self))
+        return PyUnicode_FromString(m->m_ml->ml_name);
+
+    builtins = PyEval_GetBuiltins();
+    getattr = _PyDict_GetItemId(builtins, &PyId_getattr);
+    return Py_BuildValue("O(Os)", getattr, m->m_self, m->m_ml->ml_name);
+}
+
+static PyMethodDef meth_methods[] = {
+    {"__reduce__", (PyCFunction)meth_reduce, METH_NOARGS, NULL},
+    {NULL, NULL}
+};
+
+static PyObject *
+meth_get__text_signature__(PyCFunctionObject *m, void *closure)
 {
-    const char *doc = m->m_ml->ml_doc;
+    return _PyType_GetTextSignatureFromInternalDoc(m->m_ml->ml_name, m->m_ml->ml_doc);
+}
 
-    if (doc != NULL)
-        return PyUnicode_FromString(doc);
-    Py_INCREF(Py_None);
-    return Py_None;
+static PyObject *
+meth_get__doc__(PyCFunctionObject *m, void *closure)
+{
+    return _PyType_GetDocFromInternalDoc(m->m_ml->ml_name, m->m_ml->ml_doc);
 }
 
 static PyObject *
@@ -211,6 +257,7 @@ static PyGetSetDef meth_getsets [] = {
     {"__name__", (getter)meth_get__name__, NULL, NULL},
     {"__qualname__", (getter)meth_get__qualname__, NULL, NULL},
     {"__self__", (getter)meth_get__self__, NULL, NULL},
+    {"__text_signature__", (getter)meth_get__text_signature__, NULL, NULL},
     {0}
 };
 
@@ -308,7 +355,7 @@ PyTypeObject PyCFunction_Type = {
     0,                                          /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
-    0,                                          /* tp_methods */
+    meth_methods,                               /* tp_methods */
     meth_members,                               /* tp_members */
     meth_getsets,                               /* tp_getset */
     0,                                          /* tp_base */
@@ -346,17 +393,3 @@ _PyCFunction_DebugMallocStats(FILE *out)
                            "free PyCFunctionObject",
                            numfree, sizeof(PyCFunctionObject));
 }
-
-/* PyCFunction_New() is now just a macro that calls PyCFunction_NewEx(),
-   but it's part of the API so we need to keep a function around that
-   existing C extensions can call.
-*/
-
-#undef PyCFunction_New
-PyAPI_FUNC(PyObject *) PyCFunction_New(PyMethodDef *, PyObject *);
-
-PyObject *
-PyCFunction_New(PyMethodDef *ml, PyObject *self)
-{
-    return PyCFunction_NewEx(ml, self, NULL);
-}
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index 72978ae..f509932 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -11,6 +11,8 @@ typedef struct {
     PyObject *md_dict;
     struct PyModuleDef *md_def;
     void *md_state;
+    PyObject *md_weaklist;
+    PyObject *md_name;  /* for logging purposes after md_dict is cleared */
 } PyModuleObject;
 
 static PyMemberDef module_members[] = {
@@ -26,6 +28,35 @@ static PyTypeObject moduledef_type = {
 };
 
 
+static int
+module_init_dict(PyModuleObject *mod, PyObject *md_dict,
+                 PyObject *name, PyObject *doc)
+{
+    if (md_dict == NULL)
+        return -1;
+    if (doc == NULL)
+        doc = Py_None;
+
+    if (PyDict_SetItemString(md_dict, "__name__", name) != 0)
+        return -1;
+    if (PyDict_SetItemString(md_dict, "__doc__", doc) != 0)
+        return -1;
+    if (PyDict_SetItemString(md_dict, "__package__", Py_None) != 0)
+        return -1;
+    if (PyDict_SetItemString(md_dict, "__loader__", Py_None) != 0)
+        return -1;
+    if (PyDict_SetItemString(md_dict, "__spec__", Py_None) != 0)
+        return -1;
+    if (PyUnicode_CheckExact(name)) {
+        Py_INCREF(name);
+        Py_XDECREF(mod->md_name);
+        mod->md_name = name;
+    }
+
+    return 0;
+}
+
+
 PyObject *
 PyModule_NewObject(PyObject *name)
 {
@@ -35,14 +66,10 @@ PyModule_NewObject(PyObject *name)
         return NULL;
     m->md_def = NULL;
     m->md_state = NULL;
+    m->md_weaklist = NULL;
+    m->md_name = NULL;
     m->md_dict = PyDict_New();
-    if (m->md_dict == NULL)
-        goto fail;
-    if (PyDict_SetItemString(m->md_dict, "__name__", name) != 0)
-        goto fail;
-    if (PyDict_SetItemString(m->md_dict, "__doc__", Py_None) != 0)
-        goto fail;
-    if (PyDict_SetItemString(m->md_dict, "__package__", Py_None) != 0)
+    if (module_init_dict(m, m->md_dict, name, NULL) != 0)
         goto fail;
     PyObject_GC_Track(m);
     return (PyObject *)m;
@@ -272,6 +299,14 @@ PyModule_GetState(PyObject* m)
 void
 _PyModule_Clear(PyObject *m)
 {
+    PyObject *d = ((PyModuleObject *)m)->md_dict;
+    if (d != NULL)
+        _PyModule_ClearDict(d);
+}
+
+void
+_PyModule_ClearDict(PyObject *d)
+{
     /* To make the execution order of destructors for global
        objects a bit more predictable, we first zap all objects
        whose name starts with a single underscore, before we clear
@@ -281,11 +316,6 @@ _PyModule_Clear(PyObject *m)
 
     Py_ssize_t pos;
     PyObject *key, *value;
-    PyObject *d;
-
-    d = ((PyModuleObject *)m)->md_dict;
-    if (d == NULL)
-        return;
 
     /* First, clear only names starting with a single underscore */
     pos = 0;
@@ -349,9 +379,7 @@ module_init(PyModuleObject *m, PyObject *args, PyObject *kwds)
             return -1;
         m->md_dict = dict;
     }
-    if (PyDict_SetItemString(dict, "__name__", name) < 0)
-        return -1;
-    if (PyDict_SetItemString(dict, "__doc__", doc) < 0)
+    if (module_init_dict(m, dict, name, doc) < 0)
         return -1;
     return 0;
 }
@@ -360,12 +388,15 @@ static void
 module_dealloc(PyModuleObject *m)
 {
     PyObject_GC_UnTrack(m);
+    if (Py_VerboseFlag && m->md_name) {
+        PySys_FormatStderr("# destroy %S\n", m->md_name);
+    }
+    if (m->md_weaklist != NULL)
+        PyObject_ClearWeakRefs((PyObject *) m);
     if (m->md_def && m->md_def->m_free)
         m->md_def->m_free(m);
-    if (m->md_dict != NULL) {
-        _PyModule_Clear((PyObject *)m);
-        Py_DECREF(m->md_dict);
-    }
+    Py_XDECREF(m->md_dict);
+    Py_XDECREF(m->md_name);
     if (m->md_state != NULL)
         PyMem_FREE(m->md_state);
     Py_TYPE(m)->tp_free((PyObject *)m);
@@ -374,55 +405,10 @@ module_dealloc(PyModuleObject *m)
 static PyObject *
 module_repr(PyModuleObject *m)
 {
-    PyObject *name, *filename, *repr, *loader = NULL;
+    PyThreadState *tstate = PyThreadState_GET();
+    PyInterpreterState *interp = tstate->interp;
 
-    /* See if the module has an __loader__.  If it does, give the loader the
-     * first shot at producing a repr for the module.
-     */
-    if (m->md_dict != NULL) {
-        loader = PyDict_GetItemString(m->md_dict, "__loader__");
-    }
-    if (loader != NULL) {
-        repr = PyObject_CallMethod(loader, "module_repr", "(O)",
-                                   (PyObject *)m, NULL);
-        if (repr == NULL) {
-            PyErr_Clear();
-        }
-        else {
-            return repr;
-        }
-    }
-    /* __loader__.module_repr(m) did not provide us with a repr.  Next, see if
-     * the module has an __file__.  If it doesn't then use repr(__loader__) if
-     * it exists, otherwise, just use module.__name__.
-     */
-    name = PyModule_GetNameObject((PyObject *)m);
-    if (name == NULL) {
-        PyErr_Clear();
-        name = PyUnicode_FromStringAndSize("?", 1);
-        if (name == NULL)
-            return NULL;
-    }
-    filename = PyModule_GetFilenameObject((PyObject *)m);
-    if (filename == NULL) {
-        PyErr_Clear();
-        /* There's no m.__file__, so if there was an __loader__, use that in
-         * the repr, otherwise, the only thing you can use is m.__name__
-         */
-        if (loader == NULL) {
-            repr = PyUnicode_FromFormat("<module %R>", name);
-        }
-        else {
-            repr = PyUnicode_FromFormat("<module %R (%R)>", name, loader);
-        }
-    }
-    /* Finally, use m.__file__ */
-    else {
-        repr = PyUnicode_FromFormat("<module %R from %R>", name, filename);
-        Py_DECREF(filename);
-    }
-    Py_DECREF(name);
-    return repr;
+    return PyObject_CallMethod(interp->importlib, "_module_repr", "O", m);
 }
 
 static int
@@ -511,7 +497,7 @@ PyTypeObject PyModule_Type = {
     (traverseproc)module_traverse,              /* tp_traverse */
     (inquiry)module_clear,                      /* tp_clear */
     0,                                          /* tp_richcompare */
-    0,                                          /* tp_weaklistoffset */
+    offsetof(PyModuleObject, md_weaklist),      /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
     module_methods,                             /* tp_methods */
diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c
index ff278d3..3d27a95 100644
--- a/Objects/namespaceobject.c
+++ b/Objects/namespaceobject.c
@@ -44,7 +44,7 @@ namespace_init(_PyNamespaceObject *ns, PyObject *args, PyObject *kwds)
     if (args != NULL) {
         Py_ssize_t argcount = PyObject_Size(args);
         if (argcount < 0)
-            return argcount;
+            return -1;
         else if (argcount > 0) {
             PyErr_Format(PyExc_TypeError, "no positional arguments expected");
             return -1;
@@ -66,16 +66,20 @@ namespace_dealloc(_PyNamespaceObject *ns)
 
 
 static PyObject *
-namespace_repr(_PyNamespaceObject *ns)
+namespace_repr(PyObject *ns)
 {
     int i, loop_error = 0;
     PyObject *pairs = NULL, *d = NULL, *keys = NULL, *keys_iter = NULL;
     PyObject *key;
     PyObject *separator, *pairsrepr, *repr = NULL;
+    const char * name;
+
+    name = (Py_TYPE(ns) == &_PyNamespace_Type) ? "namespace"
+                                               : ns->ob_type->tp_name;
 
-    i = Py_ReprEnter((PyObject *)ns);
+    i = Py_ReprEnter(ns);
     if (i != 0) {
-        return i > 0 ? PyUnicode_FromString("namespace(...)") : NULL;
+        return i > 0 ? PyUnicode_FromFormat("%s(...)", name) : NULL;
     }
 
     pairs = PyList_New(0);
@@ -97,7 +101,7 @@ namespace_repr(_PyNamespaceObject *ns)
         goto error;
 
     while ((key = PyIter_Next(keys_iter)) != NULL) {
-        if (PyUnicode_Check(key) && PyUnicode_GET_SIZE(key) > 0) {
+        if (PyUnicode_Check(key) && PyUnicode_GET_LENGTH(key) > 0) {
             PyObject *value, *item;
 
             value = PyDict_GetItem(d, key);
@@ -127,8 +131,7 @@ namespace_repr(_PyNamespaceObject *ns)
     if (pairsrepr == NULL)
         goto error;
 
-    repr = PyUnicode_FromFormat("%s(%S)",
-                                ((PyObject *)ns)->ob_type->tp_name, pairsrepr);
+    repr = PyUnicode_FromFormat("%s(%S)", name, pairsrepr);
     Py_DECREF(pairsrepr);
 
 error:
@@ -136,7 +139,7 @@ error:
     Py_XDECREF(d);
     Py_XDECREF(keys);
     Py_XDECREF(keys_iter);
-    Py_ReprLeave((PyObject *)ns);
+    Py_ReprLeave(ns);
 
     return repr;
 }
@@ -158,14 +161,48 @@ namespace_clear(_PyNamespaceObject *ns)
 }
 
 
+static PyObject *
+namespace_richcompare(PyObject *self, PyObject *other, int op)
+{
+    if (PyObject_TypeCheck(self, &_PyNamespace_Type) &&
+        PyObject_TypeCheck(other, &_PyNamespace_Type))
+        return PyObject_RichCompare(((_PyNamespaceObject *)self)->ns_dict,
+                                   ((_PyNamespaceObject *)other)->ns_dict, op);
+    Py_RETURN_NOTIMPLEMENTED;
+}
+
+
+PyDoc_STRVAR(namespace_reduce__doc__, "Return state information for pickling");
+
+static PyObject *
+namespace_reduce(_PyNamespaceObject *ns)
+{
+    PyObject *result, *args = PyTuple_New(0);
+
+    if (!args)
+        return NULL;
+
+    result = PyTuple_Pack(3, (PyObject *)Py_TYPE(ns), args, ns->ns_dict);
+    Py_DECREF(args);
+    return result;
+}
+
+
+static PyMethodDef namespace_methods[] = {
+    {"__reduce__", (PyCFunction)namespace_reduce, METH_NOARGS,
+     namespace_reduce__doc__},
+    {NULL,         NULL}  /* sentinel */
+};
+
+
 PyDoc_STRVAR(namespace_doc,
 "A simple attribute-based namespace.\n\
 \n\
-namespace(**kwargs)");
+SimpleNamespace(**kwargs)");
 
 PyTypeObject _PyNamespace_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
-    "namespace",                                /* tp_name */
+    "types.SimpleNamespace",                    /* tp_name */
     sizeof(_PyNamespaceObject),                 /* tp_size */
     0,                                          /* tp_itemsize */
     (destructor)namespace_dealloc,              /* tp_dealloc */
@@ -188,11 +225,11 @@ PyTypeObject _PyNamespace_Type = {
     namespace_doc,                              /* tp_doc */
     (traverseproc)namespace_traverse,           /* tp_traverse */
     (inquiry)namespace_clear,                   /* tp_clear */
-    0,                                          /* tp_richcompare */
+    namespace_richcompare,                      /* tp_richcompare */
     0,                                          /* tp_weaklistoffset */
     0,                                          /* tp_iter */
     0,                                          /* tp_iternext */
-    0,                                          /* tp_methods */
+    namespace_methods,                          /* tp_methods */
     namespace_members,                          /* tp_members */
     0,                                          /* tp_getset */
     0,                                          /* tp_base */
diff --git a/Objects/object.c b/Objects/object.c
index e079d51..307e3ac 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -8,6 +8,12 @@
 extern "C" {
 #endif
 
+_Py_IDENTIFIER(Py_Repr);
+_Py_IDENTIFIER(__bytes__);
+_Py_IDENTIFIER(__dir__);
+_Py_IDENTIFIER(__isabstractmethod__);
+_Py_IDENTIFIER(builtins);
+
 #ifdef Py_REF_DEBUG
 Py_ssize_t _Py_RefTotal;
 
@@ -22,7 +28,7 @@ _Py_GetRefTotal(void)
     o = _PyDict_Dummy();
     if (o != NULL)
         total -= o->ob_refcnt;
-    o = _PySet_Dummy();
+    o = _PySet_Dummy;
     if (o != NULL)
         total -= o->ob_refcnt;
     return total;
@@ -255,6 +261,72 @@ _PyObject_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
     return PyObject_INIT_VAR(op, tp, nitems);
 }
 
+void
+PyObject_CallFinalizer(PyObject *self)
+{
+    PyTypeObject *tp = Py_TYPE(self);
+
+    /* The former could happen on heaptypes created from the C API, e.g.
+       PyType_FromSpec(). */
+    if (!PyType_HasFeature(tp, Py_TPFLAGS_HAVE_FINALIZE) ||
+        tp->tp_finalize == NULL)
+        return;
+    /* tp_finalize should only be called once. */
+    if (PyType_IS_GC(tp) && _PyGC_FINALIZED(self))
+        return;
+
+    tp->tp_finalize(self);
+    if (PyType_IS_GC(tp))
+        _PyGC_SET_FINALIZED(self, 1);
+}
+
+int
+PyObject_CallFinalizerFromDealloc(PyObject *self)
+{
+    Py_ssize_t refcnt;
+
+    /* Temporarily resurrect the object. */
+    if (self->ob_refcnt != 0) {
+        Py_FatalError("PyObject_CallFinalizerFromDealloc called on "
+                      "object with a non-zero refcount");
+    }
+    self->ob_refcnt = 1;
+
+    PyObject_CallFinalizer(self);
+
+    /* Undo the temporary resurrection; can't use DECREF here, it would
+     * cause a recursive call.
+     */
+    assert(self->ob_refcnt > 0);
+    if (--self->ob_refcnt == 0)
+        return 0;         /* this is the normal path out */
+
+    /* tp_finalize resurrected it!  Make it look like the original Py_DECREF
+     * never happened.
+     */
+    refcnt = self->ob_refcnt;
+    _Py_NewReference(self);
+    self->ob_refcnt = refcnt;
+
+    if (PyType_IS_GC(Py_TYPE(self))) {
+        assert(_PyGC_REFS(self) != _PyGC_REFS_UNTRACKED);
+    }
+    /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so
+     * we need to undo that. */
+    _Py_DEC_REFTOTAL;
+    /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object
+     * chain, so no more to do there.
+     * If COUNT_ALLOCS, the original decref bumped tp_frees, and
+     * _Py_NewReference bumped tp_allocs:  both of those need to be
+     * undone.
+     */
+#ifdef COUNT_ALLOCS
+    --Py_TYPE(self)->tp_frees;
+    --Py_TYPE(self)->tp_allocs;
+#endif
+    return -1;
+}
+
 int
 PyObject_Print(PyObject *op, FILE *fp, int flags)
 {
@@ -340,11 +412,17 @@ _PyObject_Dump(PyObject* op)
 #ifdef WITH_THREAD
         PyGILState_STATE gil;
 #endif
+        PyObject *error_type, *error_value, *error_traceback;
+
         fprintf(stderr, "object  : ");
 #ifdef WITH_THREAD
         gil = PyGILState_Ensure();
 #endif
+
+        PyErr_Fetch(&error_type, &error_value, &error_traceback);
         (void)PyObject_Print(op, stderr, 0);
+        PyErr_Restore(error_type, error_value, error_traceback);
+
 #ifdef WITH_THREAD
         PyGILState_Release(gil);
 #endif
@@ -377,6 +455,14 @@ PyObject_Repr(PyObject *v)
     if (Py_TYPE(v)->tp_repr == NULL)
         return PyUnicode_FromFormat("<%s object at %p>",
                                     v->ob_type->tp_name, v);
+
+#ifdef Py_DEBUG
+    /* PyObject_Repr() must not be called with an exception set,
+       because it may clear it (directly or indirectly) and so the
+       caller loses its exception */
+    assert(!PyErr_Occurred());
+#endif
+
     res = (*v->ob_type->tp_repr)(v);
     if (res == NULL)
         return NULL;
@@ -419,6 +505,13 @@ PyObject_Str(PyObject *v)
     if (Py_TYPE(v)->tp_str == NULL)
         return PyObject_Repr(v);
 
+#ifdef Py_DEBUG
+    /* PyObject_Str() must not be called with an exception set,
+       because it may clear it (directly or indirectly) and so the
+       caller loses its exception */
+    assert(!PyErr_Occurred());
+#endif
+
     /* It is possible for a type to have a tp_str representation that loops
        infinitely. */
     if (Py_EnterRecursiveCall(" while getting the str of an object"))
@@ -451,6 +544,9 @@ PyObject_ASCII(PyObject *v)
     if (repr == NULL)
         return NULL;
 
+    if (PyUnicode_IS_ASCII(repr))
+        return repr;
+
     /* repr is guaranteed to be a PyUnicode object by PyObject_Repr */
     ascii = _PyUnicode_AsASCIIString(repr, "backslashreplace");
     Py_DECREF(repr);
@@ -470,7 +566,6 @@ PyObject *
 PyObject_Bytes(PyObject *v)
 {
     PyObject *result, *func;
-    _Py_IDENTIFIER(__bytes__);
 
     if (v == NULL)
         return PyBytes_FromString("<NULL>");
@@ -636,150 +731,6 @@ PyObject_RichCompareBool(PyObject *v, PyObject *w, int op)
     return ok;
 }
 
-/* Set of hash utility functions to help maintaining the invariant that
-    if a==b then hash(a)==hash(b)
-
-   All the utility functions (_Py_Hash*()) return "-1" to signify an error.
-*/
-
-/* For numeric types, the hash of a number x is based on the reduction
-   of x modulo the prime P = 2**_PyHASH_BITS - 1.  It's designed so that
-   hash(x) == hash(y) whenever x and y are numerically equal, even if
-   x and y have different types.
-
-   A quick summary of the hashing strategy:
-
-   (1) First define the 'reduction of x modulo P' for any rational
-   number x; this is a standard extension of the usual notion of
-   reduction modulo P for integers.  If x == p/q (written in lowest
-   terms), the reduction is interpreted as the reduction of p times
-   the inverse of the reduction of q, all modulo P; if q is exactly
-   divisible by P then define the reduction to be infinity.  So we've
-   got a well-defined map
-
-      reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
-
-   (2) Now for a rational number x, define hash(x) by:
-
-      reduce(x)   if x >= 0
-      -reduce(-x) if x < 0
-
-   If the result of the reduction is infinity (this is impossible for
-   integers, floats and Decimals) then use the predefined hash value
-   _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
-   _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
-   hashes of float and Decimal infinities and nans.
-
-   A selling point for the above strategy is that it makes it possible
-   to compute hashes of decimal and binary floating-point numbers
-   efficiently, even if the exponent of the binary or decimal number
-   is large.  The key point is that
-
-      reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
-
-   provided that {reduce(x), reduce(y)} != {0, infinity}.  The reduction of a
-   binary or decimal float is never infinity, since the denominator is a power
-   of 2 (for binary) or a divisor of a power of 10 (for decimal).  So we have,
-   for nonnegative x,
-
-      reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
-
-      reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
-
-   and reduce(10**e) can be computed efficiently by the usual modular
-   exponentiation algorithm.  For reduce(2**e) it's even better: since
-   P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
-   by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
-
-   */
-
-Py_hash_t
-_Py_HashDouble(double v)
-{
-    int e, sign;
-    double m;
-    Py_uhash_t x, y;
-
-    if (!Py_IS_FINITE(v)) {
-        if (Py_IS_INFINITY(v))
-            return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
-        else
-            return _PyHASH_NAN;
-    }
-
-    m = frexp(v, &e);
-
-    sign = 1;
-    if (m < 0) {
-        sign = -1;
-        m = -m;
-    }
-
-    /* process 28 bits at a time;  this should work well both for binary
-       and hexadecimal floating point. */
-    x = 0;
-    while (m) {
-        x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
-        m *= 268435456.0;  /* 2**28 */
-        e -= 28;
-        y = (Py_uhash_t)m;  /* pull out integer part */
-        m -= y;
-        x += y;
-        if (x >= _PyHASH_MODULUS)
-            x -= _PyHASH_MODULUS;
-    }
-
-    /* adjust for the exponent;  first reduce it modulo _PyHASH_BITS */
-    e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
-    x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
-
-    x = x * sign;
-    if (x == (Py_uhash_t)-1)
-        x = (Py_uhash_t)-2;
-    return (Py_hash_t)x;
-}
-
-Py_hash_t
-_Py_HashPointer(void *p)
-{
-    Py_hash_t x;
-    size_t y = (size_t)p;
-    /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
-       excessive hash collisions for dicts and sets */
-    y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
-    x = (Py_hash_t)y;
-    if (x == -1)
-        x = -2;
-    return x;
-}
-
-Py_hash_t
-_Py_HashBytes(unsigned char *p, Py_ssize_t len)
-{
-    Py_uhash_t x;
-    Py_ssize_t i;
-
-    /*
-      We make the hash of the empty string be 0, rather than using
-      (prefix ^ suffix), since this slightly obfuscates the hash secret
-    */
-#ifdef Py_DEBUG
-    assert(_Py_HashSecret_Initialized);
-#endif
-    if (len == 0) {
-        return 0;
-    }
-    x = (Py_uhash_t) _Py_HashSecret.prefix;
-    x ^= (Py_uhash_t) *p << 7;
-    for (i = 0; i < len; i++)
-        x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
-    x ^= (Py_uhash_t) len;
-    x ^= (Py_uhash_t) _Py_HashSecret.suffix;
-    if (x == -1)
-        x = -2;
-    return x;
-}
-
 Py_hash_t
 PyObject_HashNotImplemented(PyObject *v)
 {
@@ -788,8 +739,6 @@ PyObject_HashNotImplemented(PyObject *v)
     return -1;
 }
 
-_Py_HashSecret_t _Py_HashSecret;
-
 Py_hash_t
 PyObject_Hash(PyObject *v)
 {
@@ -859,7 +808,6 @@ _PyObject_IsAbstract(PyObject *obj)
 {
     int res;
     PyObject* isabstract;
-    _Py_IDENTIFIER(__isabstractmethod__);
 
     if (obj == NULL)
         return 0;
@@ -1032,8 +980,12 @@ PyObject_SelfIter(PyObject *obj)
 PyObject *
 _PyObject_GetBuiltin(const char *name)
 {
-    PyObject *mod, *attr;
-    mod = PyImport_ImportModule("builtins");
+    PyObject *mod_name, *mod, *attr;
+
+    mod_name = _PyUnicode_FromId(&PyId_builtins);   /* borrowed */
+    if (mod_name == NULL)
+        return NULL;
+    mod = PyImport_Import(mod_name);
     if (mod == NULL)
         return NULL;
     attr = PyObject_GetAttrString(mod, name);
@@ -1317,12 +1269,11 @@ static PyObject *
 _dir_locals(void)
 {
     PyObject *names;
-    PyObject *locals = PyEval_GetLocals();
+    PyObject *locals;
 
-    if (locals == NULL) {
-        PyErr_SetString(PyExc_SystemError, "frame does not exist");
+    locals = PyEval_GetLocals();
+    if (locals == NULL)
         return NULL;
-    }
 
     names = PyMapping_Keys(locals);
     if (!names)
@@ -1347,7 +1298,6 @@ static PyObject *
 _dir_object(PyObject *obj)
 {
     PyObject *result, *sorted;
-    _Py_IDENTIFIER(__dir__);
     PyObject *dirfunc = _PyObject_LookupSpecial(obj, &PyId___dir__);
 
     assert(obj);
@@ -1515,6 +1465,17 @@ NotImplemented_repr(PyObject *op)
 }
 
 static PyObject *
+NotImplemented_reduce(PyObject *op)
+{
+    return PyUnicode_FromString("NotImplemented");
+}
+
+static PyMethodDef notimplemented_methods[] = {
+    {"__reduce__", (PyCFunction)NotImplemented_reduce, METH_NOARGS, NULL},
+    {NULL, NULL}
+};
+
+static PyObject *
 notimplemented_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 {
     if (PyTuple_GET_SIZE(args) || (kwargs && PyDict_Size(kwargs))) {
@@ -1524,12 +1485,21 @@ notimplemented_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
     Py_RETURN_NOTIMPLEMENTED;
 }
 
+static void
+notimplemented_dealloc(PyObject* ignore)
+{
+    /* This should never get called, but we also don't want to SEGV if
+     * we accidentally decref NotImplemented out of existence.
+     */
+    Py_FatalError("deallocating NotImplemented");
+}
+
 PyTypeObject _PyNotImplemented_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     "NotImplementedType",
     0,
     0,
-    none_dealloc,       /*tp_dealloc*/ /*never called*/
+    notimplemented_dealloc,       /*tp_dealloc*/ /*never called*/
     0,                  /*tp_print*/
     0,                  /*tp_getattr*/
     0,                  /*tp_setattr*/
@@ -1552,7 +1522,7 @@ PyTypeObject _PyNotImplemented_Type = {
     0,                  /*tp_weaklistoffset */
     0,                  /*tp_iter */
     0,                  /*tp_iternext */
-    0,                  /*tp_methods */
+    notimplemented_methods, /*tp_methods */
     0,                  /*tp_members */
     0,                  /*tp_getset */
     0,                  /*tp_base */
@@ -1699,15 +1669,6 @@ _Py_ReadyTypes(void)
     if (PyType_Ready(&PyMemberDescr_Type) < 0)
         Py_FatalError("Can't initialize member descriptor type");
 
-    if (PyType_Ready(&PyFilter_Type) < 0)
-        Py_FatalError("Can't initialize filter type");
-
-    if (PyType_Ready(&PyMap_Type) < 0)
-        Py_FatalError("Can't initialize map type");
-
-    if (PyType_Ready(&PyZip_Type) < 0)
-        Py_FatalError("Can't initialize zip type");
-
     if (PyType_Ready(&_PyNamespace_Type) < 0)
         Py_FatalError("Can't initialize namespace type");
 
@@ -1749,10 +1710,10 @@ _Py_NewReference(PyObject *op)
 }
 
 void
-_Py_ForgetReference(register PyObject *op)
+_Py_ForgetReference(PyObject *op)
 {
 #ifdef SLOW_UNREF_CHECK
-    register PyObject *p;
+    PyObject *p;
 #endif
     if (op->ob_refcnt < 0)
         Py_FatalError("UNREF negative refcnt");
@@ -1856,26 +1817,6 @@ PyTypeObject *_PyCapsule_hack = &PyCapsule_Type;
 Py_ssize_t (*_Py_abstract_hack)(PyObject *) = PyObject_Size;
 
 
-/* Python's malloc wrappers (see pymem.h) */
-
-void *
-PyMem_Malloc(size_t nbytes)
-{
-    return PyMem_MALLOC(nbytes);
-}
-
-void *
-PyMem_Realloc(void *p, size_t nbytes)
-{
-    return PyMem_REALLOC(p, nbytes);
-}
-
-void
-PyMem_Free(void *p)
-{
-    PyMem_FREE(p);
-}
-
 void
 _PyObject_DebugTypeStats(FILE *out)
 {
@@ -1885,7 +1826,6 @@ _PyObject_DebugTypeStats(FILE *out)
     _PyFrame_DebugMallocStats(out);
     _PyList_DebugMallocStats(out);
     _PyMethod_DebugMallocStats(out);
-    _PySet_DebugMallocStats(out);
     _PyTuple_DebugMallocStats(out);
 }
 
@@ -1901,8 +1841,6 @@ _PyObject_DebugTypeStats(FILE *out)
    See dictobject.c and listobject.c for examples of use.
 */
 
-#define KEY "Py_Repr"
-
 int
 Py_ReprEnter(PyObject *obj)
 {
@@ -1911,14 +1849,16 @@ Py_ReprEnter(PyObject *obj)
     Py_ssize_t i;
 
     dict = PyThreadState_GetDict();
+    /* Ignore a missing thread-state, so that this function can be called
+       early on startup. */
     if (dict == NULL)
         return 0;
-    list = PyDict_GetItemString(dict, KEY);
+    list = _PyDict_GetItemId(dict, &PyId_Py_Repr);
     if (list == NULL) {
         list = PyList_New(0);
         if (list == NULL)
             return -1;
-        if (PyDict_SetItemString(dict, KEY, list) < 0)
+        if (_PyDict_SetItemId(dict, &PyId_Py_Repr, list) < 0)
             return -1;
         Py_DECREF(list);
     }
@@ -1927,7 +1867,8 @@ Py_ReprEnter(PyObject *obj)
         if (PyList_GET_ITEM(list, i) == obj)
             return 1;
     }
-    PyList_Append(list, obj);
+    if (PyList_Append(list, obj) < 0)
+        return -1;
     return 0;
 }
 
@@ -1937,13 +1878,18 @@ Py_ReprLeave(PyObject *obj)
     PyObject *dict;
     PyObject *list;
     Py_ssize_t i;
+    PyObject *error_type, *error_value, *error_traceback;
+
+    PyErr_Fetch(&error_type, &error_value, &error_traceback);
 
     dict = PyThreadState_GetDict();
     if (dict == NULL)
-        return;
-    list = PyDict_GetItemString(dict, KEY);
+        goto finally;
+
+    list = _PyDict_GetItemId(dict, &PyId_Py_Repr);
     if (list == NULL || !PyList_Check(list))
-        return;
+        goto finally;
+
     i = PyList_GET_SIZE(list);
     /* Count backwards because we always expect obj to be list[-1] */
     while (--i >= 0) {
@@ -1952,6 +1898,10 @@ Py_ReprLeave(PyObject *obj)
             break;
         }
     }
+
+finally:
+    /* ignore exceptions because there is no way to report them. */
+    PyErr_Restore(error_type, error_value, error_traceback);
 }
 
 /* Trashcan support. */
@@ -1972,7 +1922,7 @@ void
 _PyTrash_deposit_object(PyObject *op)
 {
     assert(PyObject_IS_GC(op));
-    assert(_Py_AS_GC(op)->gc.gc_refs == _PyGC_REFS_UNTRACKED);
+    assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED);
     assert(op->ob_refcnt == 0);
     _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later;
     _PyTrash_delete_later = op;
@@ -1984,7 +1934,7 @@ _PyTrash_thread_deposit_object(PyObject *op)
 {
     PyThreadState *tstate = PyThreadState_GET();
     assert(PyObject_IS_GC(op));
-    assert(_Py_AS_GC(op)->gc.gc_refs == _PyGC_REFS_UNTRACKED);
+    assert(_PyGC_REFS(op) == _PyGC_REFS_UNTRACKED);
     assert(op->ob_refcnt == 0);
     _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *) tstate->trash_delete_later;
     tstate->trash_delete_later = op;
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index c261b68..3c33255 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -1,14 +1,374 @@
 #include "Python.h"
 
-#ifdef WITH_PYMALLOC
+/* Python's malloc wrappers (see pymem.h) */
+
+#ifdef PYMALLOC_DEBUG   /* WITH_PYMALLOC && PYMALLOC_DEBUG */
+/* Forward declaration */
+static void* _PyMem_DebugMalloc(void *ctx, size_t size);
+static void _PyMem_DebugFree(void *ctx, void *p);
+static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
+
+static void _PyObject_DebugDumpAddress(const void *p);
+static void _PyMem_DebugCheckAddress(char api_id, const void *p);
+#endif
 
-#ifdef HAVE_MMAP
- #include <sys/mman.h>
- #ifdef MAP_ANONYMOUS
-  #define ARENAS_USE_MMAP
+#if defined(__has_feature)  /* Clang */
+ #if __has_feature(address_sanitizer)  /* is ASAN enabled? */
+  #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
+        __attribute__((no_address_safety_analysis)) \
+        __attribute__ ((noinline))
+ #else
+  #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
  #endif
+#else
+ #if defined(__SANITIZE_ADDRESS__)  /* GCC 4.8.x, is ASAN enabled? */
+  #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
+        __attribute__((no_address_safety_analysis)) \
+        __attribute__ ((noinline))
+ #else
+  #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
+ #endif
+#endif
+
+#ifdef WITH_PYMALLOC
+
+#ifdef MS_WINDOWS
+#  include <windows.h>
+#elif defined(HAVE_MMAP)
+#  include <sys/mman.h>
+#  ifdef MAP_ANONYMOUS
+#    define ARENAS_USE_MMAP
+#  endif
+#endif
+
+/* Forward declaration */
+static void* _PyObject_Malloc(void *ctx, size_t size);
+static void _PyObject_Free(void *ctx, void *p);
+static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
 #endif
 
+
+static void *
+_PyMem_RawMalloc(void *ctx, size_t size)
+{
+    /* PyMem_Malloc(0) means malloc(1). Some systems would return NULL
+       for malloc(0), which would be treated as an error. Some platforms would
+       return a pointer with no memory behind it, which would break pymalloc.
+       To solve these problems, allocate an extra byte. */
+    if (size == 0)
+        size = 1;
+    return malloc(size);
+}
+
+static void *
+_PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
+{
+    if (size == 0)
+        size = 1;
+    return realloc(ptr, size);
+}
+
+static void
+_PyMem_RawFree(void *ctx, void *ptr)
+{
+    free(ptr);
+}
+
+
+#ifdef MS_WINDOWS
+static void *
+_PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
+{
+    return VirtualAlloc(NULL, size,
+                        MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+}
+
+static void
+_PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
+{
+    VirtualFree(ptr, 0, MEM_RELEASE);
+}
+
+#elif defined(ARENAS_USE_MMAP)
+static void *
+_PyObject_ArenaMmap(void *ctx, size_t size)
+{
+    void *ptr;
+    ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
+               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+    if (ptr == MAP_FAILED)
+        return NULL;
+    assert(ptr != NULL);
+    return ptr;
+}
+
+static void
+_PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
+{
+    munmap(ptr, size);
+}
+
+#else
+static void *
+_PyObject_ArenaMalloc(void *ctx, size_t size)
+{
+    return malloc(size);
+}
+
+static void
+_PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
+{
+    free(ptr);
+}
+#endif
+
+
+#define PYRAW_FUNCS _PyMem_RawMalloc, _PyMem_RawRealloc, _PyMem_RawFree
+#ifdef WITH_PYMALLOC
+#  define PYOBJ_FUNCS _PyObject_Malloc, _PyObject_Realloc, _PyObject_Free
+#else
+#  define PYOBJ_FUNCS PYRAW_FUNCS
+#endif
+#define PYMEM_FUNCS PYRAW_FUNCS
+
+#ifdef PYMALLOC_DEBUG
+typedef struct {
+    /* We tag each block with an API ID in order to tag API violations */
+    char api_id;
+    PyMemAllocator alloc;
+} debug_alloc_api_t;
+static struct {
+    debug_alloc_api_t raw;
+    debug_alloc_api_t mem;
+    debug_alloc_api_t obj;
+} _PyMem_Debug = {
+    {'r', {NULL, PYRAW_FUNCS}},
+    {'m', {NULL, PYMEM_FUNCS}},
+    {'o', {NULL, PYOBJ_FUNCS}}
+    };
+
+#define PYDBG_FUNCS _PyMem_DebugMalloc, _PyMem_DebugRealloc, _PyMem_DebugFree
+#endif
+
+static PyMemAllocator _PyMem_Raw = {
+#ifdef PYMALLOC_DEBUG
+    &_PyMem_Debug.raw, PYDBG_FUNCS
+#else
+    NULL, PYRAW_FUNCS
+#endif
+    };
+
+static PyMemAllocator _PyMem = {
+#ifdef PYMALLOC_DEBUG
+    &_PyMem_Debug.mem, PYDBG_FUNCS
+#else
+    NULL, PYMEM_FUNCS
+#endif
+    };
+
+static PyMemAllocator _PyObject = {
+#ifdef PYMALLOC_DEBUG
+    &_PyMem_Debug.obj, PYDBG_FUNCS
+#else
+    NULL, PYOBJ_FUNCS
+#endif
+    };
+
+#undef PYRAW_FUNCS
+#undef PYMEM_FUNCS
+#undef PYOBJ_FUNCS
+#undef PYDBG_FUNCS
+
+static PyObjectArenaAllocator _PyObject_Arena = {NULL,
+#ifdef MS_WINDOWS
+    _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
+#elif defined(ARENAS_USE_MMAP)
+    _PyObject_ArenaMmap, _PyObject_ArenaMunmap
+#else
+    _PyObject_ArenaMalloc, _PyObject_ArenaFree
+#endif
+    };
+
+void
+PyMem_SetupDebugHooks(void)
+{
+#ifdef PYMALLOC_DEBUG
+    PyMemAllocator alloc;
+
+    alloc.malloc = _PyMem_DebugMalloc;
+    alloc.realloc = _PyMem_DebugRealloc;
+    alloc.free = _PyMem_DebugFree;
+
+    if (_PyMem_Raw.malloc != _PyMem_DebugMalloc) {
+        alloc.ctx = &_PyMem_Debug.raw;
+        PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
+        PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
+    }
+
+    if (_PyMem.malloc != _PyMem_DebugMalloc) {
+        alloc.ctx = &_PyMem_Debug.mem;
+        PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
+        PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
+    }
+
+    if (_PyObject.malloc != _PyMem_DebugMalloc) {
+        alloc.ctx = &_PyMem_Debug.obj;
+        PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
+        PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
+    }
+#endif
+}
+
+void
+PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocator *allocator)
+{
+    switch(domain)
+    {
+    case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
+    case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
+    case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
+    default:
+        /* unknown domain */
+        allocator->ctx = NULL;
+        allocator->malloc = NULL;
+        allocator->realloc = NULL;
+        allocator->free = NULL;
+    }
+}
+
+void
+PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocator *allocator)
+{
+    switch(domain)
+    {
+    case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
+    case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
+    case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
+    /* ignore unknown domain */
+    }
+
+}
+
+void
+PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
+{
+    *allocator = _PyObject_Arena;
+}
+
+void
+PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
+{
+    _PyObject_Arena = *allocator;
+}
+
+void *
+PyMem_RawMalloc(size_t size)
+{
+    /*
+     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
+     * Most python internals blindly use a signed Py_ssize_t to track
+     * things without checking for overflows or negatives.
+     * As size_t is unsigned, checking for size < 0 is not required.
+     */
+    if (size > (size_t)PY_SSIZE_T_MAX)
+        return NULL;
+
+    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
+}
+
+void*
+PyMem_RawRealloc(void *ptr, size_t new_size)
+{
+    /* see PyMem_RawMalloc() */
+    if (new_size > (size_t)PY_SSIZE_T_MAX)
+        return NULL;
+    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
+}
+
+void PyMem_RawFree(void *ptr)
+{
+    _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
+}
+
+void *
+PyMem_Malloc(size_t size)
+{
+    /* see PyMem_RawMalloc() */
+    if (size > (size_t)PY_SSIZE_T_MAX)
+        return NULL;
+    return _PyMem.malloc(_PyMem.ctx, size);
+}
+
+void *
+PyMem_Realloc(void *ptr, size_t new_size)
+{
+    /* see PyMem_RawMalloc() */
+    if (new_size > (size_t)PY_SSIZE_T_MAX)
+        return NULL;
+    return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
+}
+
+void
+PyMem_Free(void *ptr)
+{
+    _PyMem.free(_PyMem.ctx, ptr);
+}
+
+char *
+_PyMem_RawStrdup(const char *str)
+{
+    size_t size;
+    char *copy;
+
+    size = strlen(str) + 1;
+    copy = PyMem_RawMalloc(size);
+    if (copy == NULL)
+        return NULL;
+    memcpy(copy, str, size);
+    return copy;
+}
+
+char *
+_PyMem_Strdup(const char *str)
+{
+    size_t size;
+    char *copy;
+
+    size = strlen(str) + 1;
+    copy = PyMem_Malloc(size);
+    if (copy == NULL)
+        return NULL;
+    memcpy(copy, str, size);
+    return copy;
+}
+
+void *
+PyObject_Malloc(size_t size)
+{
+    /* see PyMem_RawMalloc() */
+    if (size > (size_t)PY_SSIZE_T_MAX)
+        return NULL;
+    return _PyObject.malloc(_PyObject.ctx, size);
+}
+
+void *
+PyObject_Realloc(void *ptr, size_t new_size)
+{
+    /* see PyMem_RawMalloc() */
+    if (new_size > (size_t)PY_SSIZE_T_MAX)
+        return NULL;
+    return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
+}
+
+void
+PyObject_Free(void *ptr)
+{
+    _PyObject.free(_PyObject.ctx, ptr);
+}
+
+
+#ifdef WITH_PYMALLOC
+
 #ifdef WITH_VALGRIND
 #include <valgrind/valgrind.h>
 
@@ -525,6 +885,15 @@ static size_t ntimes_arena_allocated = 0;
 /* High water mark (max value ever seen) for narenas_currently_allocated. */
 static size_t narenas_highwater = 0;
 
+static Py_ssize_t _Py_AllocatedBlocks = 0;
+
+Py_ssize_t
+_Py_GetAllocatedBlocks(void)
+{
+    return _Py_AllocatedBlocks;
+}
+
+
 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
  * allocate a new arena, and return the address of an arena_object
  * describing the new arena.  It's expected that the caller will set
@@ -536,7 +905,6 @@ new_arena(void)
     struct arena_object* arenaobj;
     uint excess;        /* number of bytes above pool alignment */
     void *address;
-    int err;
 
 #ifdef PYMALLOC_DEBUG
     if (Py_GETENV("PYTHONMALLOCSTATS"))
@@ -558,7 +926,7 @@ new_arena(void)
             return NULL;                /* overflow */
 #endif
         nbytes = numarenas * sizeof(*arenas);
-        arenaobj = (struct arena_object *)realloc(arenas, nbytes);
+        arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
         if (arenaobj == NULL)
             return NULL;
         arenas = arenaobj;
@@ -589,15 +957,8 @@ new_arena(void)
     arenaobj = unused_arena_objects;
     unused_arena_objects = arenaobj->nextarena;
     assert(arenaobj->address == 0);
-#ifdef ARENAS_USE_MMAP
-    address = mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE,
-                                   MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-    err = (address == MAP_FAILED);
-#else
-    address = malloc(ARENA_SIZE);
-    err = (address == 0);
-#endif
-    if (err) {
+    address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
+    if (address == NULL) {
         /* The allocation failed: return NULL after putting the
          * arenaobj back.
          */
@@ -760,15 +1121,16 @@ int Py_ADDRESS_IN_RANGE(void *P, poolp pool) Py_NO_INLINE;
  * Unless the optimizer reorders everything, being too smart...
  */
 
-#undef PyObject_Malloc
-void *
-PyObject_Malloc(size_t nbytes)
+static void *
+_PyObject_Malloc(void *ctx, size_t nbytes)
 {
     block *bp;
     poolp pool;
     poolp next;
     uint size;
 
+    _Py_AllocatedBlocks++;
+
 #ifdef WITH_VALGRIND
     if (UNLIKELY(running_on_valgrind == -1))
         running_on_valgrind = RUNNING_ON_VALGRIND;
@@ -777,15 +1139,6 @@ PyObject_Malloc(size_t nbytes)
 #endif
 
     /*
-     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
-     * Most python internals blindly use a signed Py_ssize_t to track
-     * things without checking for overflows or negatives.
-     * As size_t is unsigned, checking for nbytes < 0 is not required.
-     */
-    if (nbytes > PY_SSIZE_T_MAX)
-        return NULL;
-
-    /*
      * This implicitly redirects malloc(0).
      */
     if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
@@ -901,6 +1254,7 @@ PyObject_Malloc(size_t nbytes)
                  * and free list are already initialized.
                  */
                 bp = pool->freeblock;
+                assert(bp != NULL);
                 pool->freeblock = *(block **)bp;
                 UNLOCK();
                 return (void *)bp;
@@ -956,16 +1310,19 @@ redirect:
      * last chance to serve the request) or when the max memory limit
      * has been reached.
      */
-    if (nbytes == 0)
-        nbytes = 1;
-    return (void *)malloc(nbytes);
+    {
+        void *result = PyMem_RawMalloc(nbytes);
+        if (!result)
+            _Py_AllocatedBlocks--;
+        return result;
+    }
 }
 
 /* free */
 
-#undef PyObject_Free
-void
-PyObject_Free(void *p)
+ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
+static void
+_PyObject_Free(void *ctx, void *p)
 {
     poolp pool;
     block *lastfree;
@@ -978,6 +1335,8 @@ PyObject_Free(void *p)
     if (p == NULL)      /* free(NULL) has no effect */
         return;
 
+    _Py_AllocatedBlocks--;
+
 #ifdef WITH_VALGRIND
     if (UNLIKELY(running_on_valgrind > 0))
         goto redirect;
@@ -1072,11 +1431,8 @@ PyObject_Free(void *p)
                 unused_arena_objects = ao;
 
                 /* Free the entire arena. */
-#ifdef ARENAS_USE_MMAP
-                munmap((void *)ao->address, ARENA_SIZE);
-#else
-                free((void *)ao->address);
-#endif
+                _PyObject_Arena.free(_PyObject_Arena.ctx,
+                                     (void *)ao->address, ARENA_SIZE);
                 ao->address = 0;                        /* mark unassociated */
                 --narenas_currently_allocated;
 
@@ -1185,7 +1541,7 @@ PyObject_Free(void *p)
 redirect:
 #endif
     /* We didn't allocate this address. */
-    free(p);
+    PyMem_RawFree(p);
 }
 
 /* realloc.  If p is NULL, this acts like malloc(nbytes).  Else if nbytes==0,
@@ -1193,9 +1549,9 @@ redirect:
  * return a non-NULL result.
  */
 
-#undef PyObject_Realloc
-void *
-PyObject_Realloc(void *p, size_t nbytes)
+ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
+static void *
+_PyObject_Realloc(void *ctx, void *p, size_t nbytes)
 {
     void *bp;
     poolp pool;
@@ -1205,16 +1561,7 @@ PyObject_Realloc(void *p, size_t nbytes)
 #endif
 
     if (p == NULL)
-        return PyObject_Malloc(nbytes);
-
-    /*
-     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
-     * Most python internals blindly use a signed Py_ssize_t to track
-     * things without checking for overflows or negatives.
-     * As size_t is unsigned, checking for nbytes < 0 is not required.
-     */
-    if (nbytes > PY_SSIZE_T_MAX)
-        return NULL;
+        return _PyObject_Malloc(ctx, nbytes);
 
 #ifdef WITH_VALGRIND
     /* Treat running_on_valgrind == -1 the same as 0 */
@@ -1242,10 +1589,10 @@ PyObject_Realloc(void *p, size_t nbytes)
             }
             size = nbytes;
         }
-        bp = PyObject_Malloc(nbytes);
+        bp = _PyObject_Malloc(ctx, nbytes);
         if (bp != NULL) {
             memcpy(bp, p, size);
-            PyObject_Free(p);
+            _PyObject_Free(ctx, p);
         }
         return bp;
     }
@@ -1263,14 +1610,14 @@ PyObject_Realloc(void *p, size_t nbytes)
      * at p.  Instead we punt:  let C continue to manage this block.
      */
     if (nbytes)
-        return realloc(p, nbytes);
+        return PyMem_RawRealloc(p, nbytes);
     /* C doesn't define the result of realloc(p, 0) (it may or may not
      * return NULL then), but Python's docs promise that nbytes==0 never
      * returns NULL.  We don't pass 0 to realloc(), to avoid that endcase
      * to begin with.  Even then, we can't be sure that realloc() won't
      * return NULL.
      */
-    bp = realloc(p, 1);
+    bp = PyMem_RawRealloc(p, 1);
     return bp ? bp : p;
 }
 
@@ -1280,23 +1627,12 @@ PyObject_Realloc(void *p, size_t nbytes)
 /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
  * only be used by extensions that are compiled with pymalloc enabled. */
 
-void *
-PyObject_Malloc(size_t n)
-{
-    return PyMem_MALLOC(n);
-}
-
-void *
-PyObject_Realloc(void *p, size_t n)
+Py_ssize_t
+_Py_GetAllocatedBlocks(void)
 {
-    return PyMem_REALLOC(p, n);
+    return 0;
 }
 
-void
-PyObject_Free(void *p)
-{
-    PyMem_FREE(p);
-}
 #endif /* WITH_PYMALLOC */
 
 #ifdef PYMALLOC_DEBUG
@@ -1316,10 +1652,6 @@ PyObject_Free(void *p)
 #define DEADBYTE       0xDB    /* dead (newly freed) memory */
 #define FORBIDDENBYTE  0xFB    /* untouchable bytes at each end of a block */
 
-/* We tag each block with an API ID in order to tag API violations */
-#define _PYMALLOC_MEM_ID 'm'   /* the PyMem_Malloc() API */
-#define _PYMALLOC_OBJ_ID 'o'   /* The PyObject_Malloc() API */
-
 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
 
 /* serialno is always incremented via calling this routine.  The point is
@@ -1392,7 +1724,9 @@ pool_is_in_list(const poolp target, poolp list)
 p[0: S]
     Number of bytes originally asked for.  This is a size_t, big-endian (easier
     to read in a memory dump).
-p[S: 2*S]
+p[S]
+    API ID.  See PEP 445.  This is a character, but seems undocumented.
+p[S+1: 2*S]
     Copies of FORBIDDENBYTE.  Used to catch under- writes and reads.
 p[2*S: 2*S+n]
     The requested memory, filled with copies of CLEANBYTE.
@@ -1402,76 +1736,36 @@ p[2*S: 2*S+n]
 p[2*S+n: 2*S+n+S]
     Copies of FORBIDDENBYTE.  Used to catch over- writes and reads.
 p[2*S+n+S: 2*S+n+2*S]
-    A serial number, incremented by 1 on each call to _PyObject_DebugMalloc
-    and _PyObject_DebugRealloc.
+    A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
+    and _PyMem_DebugRealloc.
     This is a big-endian size_t.
     If "bad memory" is detected later, the serial number gives an
     excellent way to set a breakpoint on the next run, to capture the
     instant at which this block was passed out.
 */
 
-/* debug replacements for the PyMem_* memory API */
-void *
-_PyMem_DebugMalloc(size_t nbytes)
-{
-    return _PyObject_DebugMallocApi(_PYMALLOC_MEM_ID, nbytes);
-}
-void *
-_PyMem_DebugRealloc(void *p, size_t nbytes)
-{
-    return _PyObject_DebugReallocApi(_PYMALLOC_MEM_ID, p, nbytes);
-}
-void
-_PyMem_DebugFree(void *p)
-{
-    _PyObject_DebugFreeApi(_PYMALLOC_MEM_ID, p);
-}
-
-/* debug replacements for the PyObject_* memory API */
-void *
-_PyObject_DebugMalloc(size_t nbytes)
-{
-    return _PyObject_DebugMallocApi(_PYMALLOC_OBJ_ID, nbytes);
-}
-void *
-_PyObject_DebugRealloc(void *p, size_t nbytes)
-{
-    return _PyObject_DebugReallocApi(_PYMALLOC_OBJ_ID, p, nbytes);
-}
-void
-_PyObject_DebugFree(void *p)
-{
-    _PyObject_DebugFreeApi(_PYMALLOC_OBJ_ID, p);
-}
-void
-_PyObject_DebugCheckAddress(const void *p)
-{
-    _PyObject_DebugCheckAddressApi(_PYMALLOC_OBJ_ID, p);
-}
-
-
-/* generic debug memory api, with an "id" to identify the API in use */
-void *
-_PyObject_DebugMallocApi(char id, size_t nbytes)
+static void *
+_PyMem_DebugMalloc(void *ctx, size_t nbytes)
 {
+    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
     uchar *p;           /* base address of malloc'ed block */
     uchar *tail;        /* p + 2*SST + nbytes == pointer to tail pad bytes */
     size_t total;       /* nbytes + 4*SST */
 
     bumpserialno();
     total = nbytes + 4*SST;
-    if (total < nbytes)
-        /* overflow:  can't represent total as a size_t */
+    if (nbytes > PY_SSIZE_T_MAX - 4*SST)
+        /* overflow:  can't represent total as a Py_ssize_t */
         return NULL;
 
-    p = (uchar *)PyObject_Malloc(total);
+    p = (uchar *)api->alloc.malloc(api->alloc.ctx, total);
     if (p == NULL)
         return NULL;
 
     /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
     write_size_t(p, nbytes);
-    p[SST] = (uchar)id;
-    memset(p + SST + 1 , FORBIDDENBYTE, SST-1);
+    p[SST] = (uchar)api->api_id;
+    memset(p + SST + 1, FORBIDDENBYTE, SST-1);
 
     if (nbytes > 0)
         memset(p + 2*SST, CLEANBYTE, nbytes);
@@ -1489,60 +1783,64 @@ _PyObject_DebugMallocApi(char id, size_t nbytes)
    Then fills the original bytes with DEADBYTE.
    Then calls the underlying free.
 */
-void
-_PyObject_DebugFreeApi(char api, void *p)
+static void
+_PyMem_DebugFree(void *ctx, void *p)
 {
+    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
     uchar *q = (uchar *)p - 2*SST;  /* address returned from malloc */
     size_t nbytes;
 
     if (p == NULL)
         return;
-    _PyObject_DebugCheckAddressApi(api, p);
+    _PyMem_DebugCheckAddress(api->api_id, p);
     nbytes = read_size_t(q);
     nbytes += 4*SST;
     if (nbytes > 0)
         memset(q, DEADBYTE, nbytes);
-    PyObject_Free(q);
+    api->alloc.free(api->alloc.ctx, q);
 }
 
-void *
-_PyObject_DebugReallocApi(char api, void *p, size_t nbytes)
+static void *
+_PyMem_DebugRealloc(void *ctx, void *p, size_t nbytes)
 {
-    uchar *q = (uchar *)p;
+    debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
+    uchar *q = (uchar *)p, *oldq;
     uchar *tail;
     size_t total;       /* nbytes + 4*SST */
     size_t original_nbytes;
     int i;
 
     if (p == NULL)
-        return _PyObject_DebugMallocApi(api, nbytes);
+        return _PyMem_DebugMalloc(ctx, nbytes);
 
-    _PyObject_DebugCheckAddressApi(api, p);
+    _PyMem_DebugCheckAddress(api->api_id, p);
     bumpserialno();
     original_nbytes = read_size_t(q - 2*SST);
     total = nbytes + 4*SST;
-    if (total < nbytes)
-        /* overflow:  can't represent total as a size_t */
+    if (nbytes > PY_SSIZE_T_MAX - 4*SST)
+        /* overflow:  can't represent total as a Py_ssize_t */
         return NULL;
 
-    if (nbytes < original_nbytes) {
-        /* shrinking:  mark old extra memory dead */
-        memset(q + nbytes, DEADBYTE, original_nbytes - nbytes + 2*SST);
-    }
-
     /* Resize and add decorations. We may get a new pointer here, in which
      * case we didn't get the chance to mark the old memory with DEADBYTE,
      * but we live with that.
      */
-    q = (uchar *)PyObject_Realloc(q - 2*SST, total);
+    oldq = q;
+    q = (uchar *)api->alloc.realloc(api->alloc.ctx, q - 2*SST, total);
     if (q == NULL)
         return NULL;
 
+    if (q == oldq && nbytes < original_nbytes) {
+        /* shrinking:  mark old extra memory dead */
+        memset(q + nbytes, DEADBYTE, original_nbytes - nbytes);
+    }
+
     write_size_t(q, nbytes);
-    assert(q[SST] == (uchar)api);
+    assert(q[SST] == (uchar)api->api_id);
     for (i = 1; i < SST; ++i)
         assert(q[SST + i] == FORBIDDENBYTE);
     q += 2*SST;
+
     tail = q + nbytes;
     memset(tail, FORBIDDENBYTE, SST);
     write_size_t(tail + SST, serialno);
@@ -1561,8 +1859,8 @@ _PyObject_DebugReallocApi(char api, void *p, size_t nbytes)
  * and call Py_FatalError to kill the program.
  * The API id, is also checked.
  */
- void
-_PyObject_DebugCheckAddressApi(char api, const void *p)
+static void
+_PyMem_DebugCheckAddress(char api, const void *p)
 {
     const uchar *q = (const uchar *)p;
     char msgbuf[64];
@@ -1614,7 +1912,7 @@ error:
 }
 
 /* Display info to stderr about the memory block at p. */
-void
+static void
 _PyObject_DebugDumpAddress(const void *p)
 {
     const uchar *q = (const uchar *)p;
diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c
index f2030c3..c4ba715 100644
--- a/Objects/rangeobject.c
+++ b/Objects/rangeobject.c
@@ -139,7 +139,11 @@ PyDoc_STRVAR(range_doc,
 "range(stop) -> range object\n\
 range(start, stop[, step]) -> range object\n\
 \n\
-Return a virtual sequence of numbers from start to stop by step.");
+Return an object that produces a sequence of integers from start (inclusive)\n\
+to stop (exclusive) by step.  range(i, j) produces i, i+1, i+2, ..., j-1.\n\
+start defaults to 0, and stop is omitted!  range(4) produces 0, 1, 2, 3.\n\
+These are exactly the valid indices for a list of 4 elements.\n\
+When step is given, it specifies the increment (or decrement).");
 
 static void
 range_dealloc(rangeobject *r)
@@ -190,8 +194,11 @@ compute_range_length(PyObject *start, PyObject *stop, PyObject *step)
     }
 
     /* if (lo >= hi), return length of 0. */
-    if (PyObject_RichCompareBool(lo, hi, Py_GE) == 1) {
+    cmp_result = PyObject_RichCompareBool(lo, hi, Py_GE);
+    if (cmp_result != 0) {
         Py_XDECREF(step);
+        if (cmp_result < 0)
+            return NULL;
         return PyLong_FromLong(0);
     }
 
@@ -318,195 +325,6 @@ range_item(rangeobject *r, Py_ssize_t i)
     return res;
 }
 
-/* Additional helpers, since the standard slice helpers
- * all clip to PY_SSIZE_T_MAX
- */
-
-/* Replace _PyEval_SliceIndex */
-static PyObject *
-compute_slice_element(PyObject *obj)
-{
-    PyObject *result = NULL;
-    if (obj != NULL) {
-        if (PyIndex_Check(obj)) {
-            result = PyNumber_Index(obj);
-        }
-        else {
-            PyErr_SetString(PyExc_TypeError,
-                            "slice indices must be integers or "
-                            "None or have an __index__ method");
-        }
-    }
-    return result;
-}
-
-/* Replace PySlice_GetIndicesEx
- *   Result indicates whether or not the slice is empty
- *    (-1 = error, 0 = empty slice, 1 = slice contains elements)
- */
-static int
-compute_slice_indices(rangeobject *r, PySliceObject *slice,
-                      PyObject **start, PyObject **stop, PyObject **step)
-{
-    int cmp_result, has_elements;
-    Py_ssize_t clamped_step = 0;
-    PyObject *zero = NULL, *one = NULL, *neg_one = NULL, *candidate = NULL;
-    PyObject *tmp_start = NULL, *tmp_stop = NULL, *tmp_step = NULL;
-    zero = PyLong_FromLong(0);
-    if (zero == NULL) goto Fail;
-    one = PyLong_FromLong(1);
-    if (one == NULL) goto Fail;
-    neg_one = PyLong_FromLong(-1);
-    if (neg_one == NULL) goto Fail;
-
-    /* Calculate step value */
-    if (slice->step == Py_None) {
-        clamped_step = 1;
-        tmp_step = one;
-        Py_INCREF(tmp_step);
-    } else {
-        if (!_PyEval_SliceIndex(slice->step, &clamped_step)) goto Fail;
-        if (clamped_step == 0) {
-            PyErr_SetString(PyExc_ValueError,
-                            "slice step cannot be zero");
-            goto Fail;
-        }
-        tmp_step = compute_slice_element(slice->step);
-        if (tmp_step == NULL) goto Fail;
-    }
-
-    /* Calculate start value */
-    if (slice->start == Py_None) {
-        if (clamped_step < 0) {
-            tmp_start = PyNumber_Subtract(r->length, one);
-            if (tmp_start == NULL) goto Fail;
-        } else {
-            tmp_start = zero;
-            Py_INCREF(tmp_start);
-        }
-    } else {
-        candidate = compute_slice_element(slice->start);
-        if (candidate == NULL) goto Fail;
-        cmp_result = PyObject_RichCompareBool(candidate, zero, Py_LT);
-        if (cmp_result == -1) goto Fail;
-        if (cmp_result) {
-            /* candidate < 0 */
-            tmp_start = PyNumber_Add(r->length, candidate);
-            if (tmp_start == NULL) goto Fail;
-            Py_CLEAR(candidate);
-        } else {
-            /* candidate >= 0 */
-            tmp_start = candidate;
-            candidate = NULL;
-        }
-        cmp_result = PyObject_RichCompareBool(tmp_start, zero, Py_LT);
-        if (cmp_result == -1) goto Fail;
-        if (cmp_result) {
-            /* tmp_start < 0 */
-            Py_CLEAR(tmp_start);
-            if (clamped_step < 0) {
-                tmp_start = neg_one;
-            } else {
-                tmp_start = zero;
-            }
-            Py_INCREF(tmp_start);
-        } else {
-            /* tmp_start >= 0 */
-            cmp_result = PyObject_RichCompareBool(tmp_start, r->length, Py_GE);
-            if (cmp_result == -1) goto Fail;
-            if (cmp_result) {
-                /* tmp_start >= r->length */
-                Py_CLEAR(tmp_start);
-                if (clamped_step < 0) {
-                    tmp_start = PyNumber_Subtract(r->length, one);
-                    if (tmp_start == NULL) goto Fail;
-                } else {
-                    tmp_start = r->length;
-                    Py_INCREF(tmp_start);
-                }
-            }
-        }
-    }
-
-    /* Calculate stop value */
-    if (slice->stop == Py_None) {
-        if (clamped_step < 0) {
-            tmp_stop = neg_one;
-        } else {
-            tmp_stop = r->length;
-        }
-        Py_INCREF(tmp_stop);
-    } else {
-        candidate = compute_slice_element(slice->stop);
-        if (candidate == NULL) goto Fail;
-        cmp_result = PyObject_RichCompareBool(candidate, zero, Py_LT);
-        if (cmp_result == -1) goto Fail;
-        if (cmp_result) {
-            /* candidate < 0 */
-            tmp_stop = PyNumber_Add(r->length, candidate);
-            if (tmp_stop == NULL) goto Fail;
-            Py_CLEAR(candidate);
-        } else {
-            /* candidate >= 0 */
-            tmp_stop = candidate;
-            candidate = NULL;
-        }
-        cmp_result = PyObject_RichCompareBool(tmp_stop, zero, Py_LT);
-        if (cmp_result == -1) goto Fail;
-        if (cmp_result) {
-            /* tmp_stop < 0 */
-            Py_CLEAR(tmp_stop);
-            if (clamped_step < 0) {
-                tmp_stop = neg_one;
-            } else {
-                tmp_stop = zero;
-            }
-            Py_INCREF(tmp_stop);
-        } else {
-            /* tmp_stop >= 0 */
-            cmp_result = PyObject_RichCompareBool(tmp_stop, r->length, Py_GE);
-            if (cmp_result == -1) goto Fail;
-            if (cmp_result) {
-                /* tmp_stop >= r->length */
-                Py_CLEAR(tmp_stop);
-                if (clamped_step < 0) {
-                    tmp_stop = PyNumber_Subtract(r->length, one);
-                    if (tmp_stop == NULL) goto Fail;
-                } else {
-                    tmp_stop = r->length;
-                    Py_INCREF(tmp_stop);
-                }
-            }
-        }
-    }
-
-    /* Check if the slice is empty or not */
-    if (clamped_step < 0) {
-        has_elements = PyObject_RichCompareBool(tmp_start, tmp_stop, Py_GT);
-    } else {
-        has_elements = PyObject_RichCompareBool(tmp_start, tmp_stop, Py_LT);
-    }
-    if (has_elements == -1) goto Fail;
-
-    *start = tmp_start;
-    *stop = tmp_stop;
-    *step = tmp_step;
-    Py_DECREF(neg_one);
-    Py_DECREF(one);
-    Py_DECREF(zero);
-    return has_elements;
-
-  Fail:
-    Py_XDECREF(tmp_start);
-    Py_XDECREF(tmp_stop);
-    Py_XDECREF(tmp_step);
-    Py_XDECREF(candidate);
-    Py_XDECREF(neg_one);
-    Py_XDECREF(one);
-    Py_XDECREF(zero);
-    return -1;
-}
-
 static PyObject *
 compute_slice(rangeobject *r, PyObject *_slice)
 {
@@ -514,10 +332,11 @@ compute_slice(rangeobject *r, PyObject *_slice)
     rangeobject *result;
     PyObject *start = NULL, *stop = NULL, *step = NULL;
     PyObject *substart = NULL, *substop = NULL, *substep = NULL;
-    int has_elements;
+    int error;
 
-    has_elements = compute_slice_indices(r, slice, &start, &stop, &step);
-    if (has_elements == -1) return NULL;
+    error = _PySlice_GetLongIndices(slice, r->length, &start, &stop, &step);
+    if (error == -1)
+        return NULL;
 
     substep = PyNumber_Multiply(r->step, step);
     if (substep == NULL) goto fail;
@@ -527,13 +346,8 @@ compute_slice(rangeobject *r, PyObject *_slice)
     if (substart == NULL) goto fail;
     Py_CLEAR(start);
 
-    if (has_elements) {
-        substop = compute_item(r, stop);
-        if (substop == NULL) goto fail;
-    } else {
-        substop = substart;
-        Py_INCREF(substop);
-    }
+    substop = compute_item(r, stop);
+    if (substop == NULL) goto fail;
     Py_CLEAR(stop);
 
     result = make_range_object(Py_TYPE(r), substart, substop, substep);
diff --git a/Objects/setobject.c b/Objects/setobject.c
index fc17fa5..61f1d94 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -1,151 +1,115 @@
 
 /* set object implementation
+
    Written and maintained by Raymond D. Hettinger <python@rcn.com>
    Derived from Lib/sets.py and Objects/dictobject.c.
+
+   The basic lookup function used by all operations.
+   This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
+
+   The initial probe index is computed as hash mod the table size.
+   Subsequent probe indices are computed as explained in Objects/dictobject.c.
+
+   To improve cache locality, each probe inspects a series of consecutive
+   nearby entries before moving on to probes elsewhere in memory.  This leaves
+   us with a hybrid of linear probing and open addressing.  The linear probing
+   reduces the cost of hash collisions because consecutive memory accesses
+   tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
+   we then use open addressing with the upper bits from the hash value.  This
+   helps break-up long chains of collisions.
+
+   All arithmetic on hash should ignore overflow.
+
+   Unlike the dictionary implementation, the lookkey functions can return
+   NULL if the rich comparison returns an error.
 */
 
 #include "Python.h"
 #include "structmember.h"
 #include "stringlib/eq.h"
 
-/* Set a key error with the specified argument, wrapping it in a
- * tuple automatically so that tuple keys are not unpacked as the
- * exception arguments. */
-static void
-set_key_error(PyObject *arg)
-{
-    PyObject *tup;
-    tup = PyTuple_Pack(1, arg);
-    if (!tup)
-        return; /* caller will expect error to be set anyway */
-    PyErr_SetObject(PyExc_KeyError, tup);
-    Py_DECREF(tup);
-}
-
-/* This must be >= 1. */
-#define PERTURB_SHIFT 5
-
 /* Object used as dummy key to fill deleted entries */
-static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
+static PyObject _dummy_struct;
 
-#ifdef Py_REF_DEBUG
-PyObject *
-_PySet_Dummy(void)
-{
-    return dummy;
-}
-#endif
+#define dummy (&_dummy_struct)
 
-#define INIT_NONZERO_SET_SLOTS(so) do {                         \
-    (so)->table = (so)->smalltable;                             \
-    (so)->mask = PySet_MINSIZE - 1;                             \
-    (so)->hash = -1;                                            \
-    } while(0)
 
-#define EMPTY_TO_MINSIZE(so) do {                               \
-    memset((so)->smalltable, 0, sizeof((so)->smalltable));      \
-    (so)->used = (so)->fill = 0;                                \
-    INIT_NONZERO_SET_SLOTS(so);                                 \
-    } while(0)
+/* ======================================================================== */
+/* ======= Begin logic for probing the hash table ========================= */
 
-/* Reuse scheme to save calls to malloc, free, and memset */
-#ifndef PySet_MAXFREELIST
-#define PySet_MAXFREELIST 80
+/* Set this to zero to turn-off linear probing */
+#ifndef LINEAR_PROBES
+#define LINEAR_PROBES 9
 #endif
-static PySetObject *free_list[PySet_MAXFREELIST];
-static int numfree = 0;
-
-
-/*
-The basic lookup function used by all operations.
-This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
-Open addressing is preferred over chaining since the link overhead for
-chaining would be substantial (100% with typical malloc overhead).
-
-The initial probe index is computed as hash mod the table size. Subsequent
-probe indices are computed as explained in Objects/dictobject.c.
-
-All arithmetic on hash should ignore overflow.
 
-Unlike the dictionary implementation, the lookkey functions can return
-NULL if the rich comparison returns an error.
-*/
+/* This must be >= 1 */
+#define PERTURB_SHIFT 5
 
 static setentry *
-set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
+set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
 {
-    register size_t i;  /* Unsigned for defined overflow behavior. */
-    register size_t perturb;
-    register setentry *freeslot;
-    register size_t mask = so->mask;
     setentry *table = so->table;
-    register setentry *entry;
-    register int cmp;
-    PyObject *startkey;
-
-    i = (size_t)hash & mask;
-    entry = &table[i];
-    if (entry->key == NULL || entry->key == key)
+    setentry *freeslot = NULL;
+    setentry *entry;
+    size_t perturb = hash;
+    size_t mask = so->mask;
+    size_t i = (size_t)hash; /* Unsigned for defined overflow behavior. */
+    size_t j;
+    int cmp;
+
+    entry = &table[i & mask];
+    if (entry->key == NULL)
         return entry;
 
-    if (entry->key == dummy)
-        freeslot = entry;
-    else {
-        if (entry->hash == hash) {
-            startkey = entry->key;
+    while (1) {
+        if (entry->key == key)
+            return entry;
+        if (entry->hash == hash && entry->key != dummy) {
+            PyObject *startkey = entry->key;
             Py_INCREF(startkey);
             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
             Py_DECREF(startkey);
             if (cmp < 0)
                 return NULL;
-            if (table == so->table && entry->key == startkey) {
-                if (cmp > 0)
-                    return entry;
-            }
-            else {
-                /* The compare did major nasty stuff to the
-                 * set:  start over.
-                 */
+            if (table != so->table || entry->key != startkey)
                 return set_lookkey(so, key, hash);
-            }
+            if (cmp > 0)
+                return entry;
         }
-        freeslot = NULL;
-    }
+        if (entry->key == dummy && freeslot == NULL)
+            freeslot = entry;
 
-    /* In the loop, key == dummy is by far (factor of 100s) the
-       least likely outcome, so test for that last. */
-    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        entry = &table[i & mask];
-        if (entry->key == NULL) {
-            if (freeslot != NULL)
-                entry = freeslot;
-            break;
-        }
-        if (entry->key == key)
-            break;
-        if (entry->hash == hash && entry->key != dummy) {
-            startkey = entry->key;
-            Py_INCREF(startkey);
-            cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
-            Py_DECREF(startkey);
-            if (cmp < 0)
-                return NULL;
-            if (table == so->table && entry->key == startkey) {
+        for (j = 1 ; j <= LINEAR_PROBES ; j++) {
+            entry = &table[(i + j) & mask];
+            if (entry->key == NULL)
+                goto found_null;
+            if (entry->key == key)
+                return entry;
+            if (entry->hash == hash && entry->key != dummy) {
+                PyObject *startkey = entry->key;
+                Py_INCREF(startkey);
+                cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+                Py_DECREF(startkey);
+                if (cmp < 0)
+                    return NULL;
+                if (table != so->table || entry->key != startkey)
+                    return set_lookkey(so, key, hash);
                 if (cmp > 0)
-                    break;
-            }
-            else {
-                /* The compare did major nasty stuff to the
-                 * set:  start over.
-                 */
-                return set_lookkey(so, key, hash);
+                    return entry;
             }
+            if (entry->key == dummy && freeslot == NULL)
+                freeslot = entry;
         }
-        else if (entry->key == dummy && freeslot == NULL)
-            freeslot = entry;
+
+        perturb >>= PERTURB_SHIFT;
+        i = i * 5 + 1 + perturb;
+
+        entry = &table[i & mask];
+        if (entry->key == NULL)
+            goto found_null;
     }
-    return entry;
+  found_null:
+    return freeslot == NULL ? entry : freeslot;
 }
 
 /*
@@ -154,14 +118,15 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
  * see if the comparison altered the table.
  */
 static setentry *
-set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
+set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
 {
-    register size_t i;  /* Unsigned for defined overflow behavior. */
-    register size_t perturb;
-    register setentry *freeslot;
-    register size_t mask = so->mask;
     setentry *table = so->table;
-    register setentry *entry;
+    setentry *freeslot = NULL;
+    setentry *entry;
+    size_t perturb = hash;
+    size_t mask = so->mask;
+    size_t i = (size_t)hash;
+    size_t j;
 
     /* Make sure this function doesn't have to handle non-unicode keys,
        including subclasses of str; e.g., one reason to subclass
@@ -171,46 +136,94 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
         so->lookup = set_lookkey;
         return set_lookkey(so, key, hash);
     }
-    i = (size_t)hash & mask;
-    entry = &table[i];
-    if (entry->key == NULL || entry->key == key)
+
+    entry = &table[i & mask];
+    if (entry->key == NULL)
         return entry;
-    if (entry->key == dummy)
-        freeslot = entry;
-    else {
-        if (entry->hash == hash && unicode_eq(entry->key, key))
-            return entry;
-        freeslot = NULL;
-    }
 
-    /* In the loop, key == dummy is by far (factor of 100s) the
-       least likely outcome, so test for that last. */
-    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        entry = &table[i & mask];
-        if (entry->key == NULL)
-            return freeslot == NULL ? entry : freeslot;
+    while (1) {
         if (entry->key == key
             || (entry->hash == hash
-            && entry->key != dummy
-            && unicode_eq(entry->key, key)))
+                && entry->key != dummy
+                && unicode_eq(entry->key, key)))
             return entry;
         if (entry->key == dummy && freeslot == NULL)
             freeslot = entry;
+
+        for (j = 1 ; j <= LINEAR_PROBES ; j++) {
+            entry = &table[(i + j) & mask];
+            if (entry->key == NULL)
+                goto found_null;
+            if (entry->key == key
+                || (entry->hash == hash
+                    && entry->key != dummy
+                    && unicode_eq(entry->key, key)))
+                return entry;
+            if (entry->key == dummy && freeslot == NULL)
+                freeslot = entry;
+        }
+
+        perturb >>= PERTURB_SHIFT;
+        i = i * 5 + 1 + perturb;
+
+        entry = &table[i & mask];
+        if (entry->key == NULL)
+            goto found_null;
     }
-    assert(0);          /* NOT REACHED */
-    return 0;
+  found_null:
+    return freeslot == NULL ? entry : freeslot;
+}
+
+/*
+Internal routine used by set_table_resize() to insert an item which is
+known to be absent from the set.  This routine also assumes that
+the set contains no deleted entries.  Besides the performance benefit,
+using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
+Note that no refcounts are changed by this routine; if needed, the caller
+is responsible for incref'ing `key`.
+*/
+static void
+set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
+{
+    setentry *table = so->table;
+    setentry *entry;
+    size_t perturb = hash;
+    size_t mask = (size_t)so->mask;
+    size_t i = (size_t)hash;
+    size_t j;
+
+    while (1) {
+        entry = &table[i & mask];
+        if (entry->key == NULL)
+            goto found_null;
+        for (j = 1 ; j <= LINEAR_PROBES ; j++) {
+            entry = &table[(i + j) & mask];
+            if (entry->key == NULL)
+                goto found_null;
+        }
+        perturb >>= PERTURB_SHIFT;
+        i = i * 5 + 1 + perturb;
+    }
+  found_null:
+    entry->key = key;
+    entry->hash = hash;
+    so->fill++;
+    so->used++;
 }
 
+/* ======== End logic for probing the hash table ========================== */
+/* ======================================================================== */
+
+
 /*
 Internal routine to insert a new key into the table.
 Used by the public insert routine.
 Eats a reference to key.
 */
 static int
-set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash)
+set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
 {
-    register setentry *entry;
+    setentry *entry;
 
     assert(so->lookup != NULL);
     entry = so->lookup(so, key, hash);
@@ -227,7 +240,6 @@ set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash)
         entry->key = key;
         entry->hash = hash;
         so->used++;
-        Py_DECREF(dummy);
     } else {
         /* ACTIVE */
         Py_DECREF(key);
@@ -236,35 +248,6 @@ set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash)
 }
 
 /*
-Internal routine used by set_table_resize() to insert an item which is
-known to be absent from the set.  This routine also assumes that
-the set contains no deleted entries.  Besides the performance benefit,
-using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
-Note that no refcounts are changed by this routine; if needed, the caller
-is responsible for incref'ing `key`.
-*/
-static void
-set_insert_clean(register PySetObject *so, PyObject *key, Py_hash_t hash)
-{
-    register size_t i;
-    register size_t perturb;
-    register size_t mask = (size_t)so->mask;
-    setentry *table = so->table;
-    register setentry *entry;
-
-    i = (size_t)hash & mask;
-    entry = &table[i];
-    for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        entry = &table[i & mask];
-    }
-    so->fill++;
-    entry->key = key;
-    entry->hash = hash;
-    so->used++;
-}
-
-/*
 Restructure the table by allocating a new table and reinserting all
 keys again.  When entries have been deleted, the new table may
 actually be smaller than the old one.
@@ -327,23 +310,14 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
     so->table = newtable;
     so->mask = newsize - 1;
     memset(newtable, 0, sizeof(setentry) * newsize);
+    i = so->used;
     so->used = 0;
-    i = so->fill;
     so->fill = 0;
 
     /* Copy the data over; this is refcount-neutral for active entries;
        dummy entries aren't copied over, of course */
     for (entry = oldtable; i > 0; entry++) {
-        if (entry->key == NULL) {
-            /* UNUSED */
-            ;
-        } else if (entry->key == dummy) {
-            /* DUMMY */
-            --i;
-            assert(entry->key == dummy);
-            Py_DECREF(entry->key);
-        } else {
-            /* ACTIVE */
+        if (entry->key != NULL && entry->key != dummy) {
             --i;
             set_insert_clean(so, entry->key, entry->hash);
         }
@@ -357,9 +331,9 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
 /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
 
 static int
-set_add_entry(register PySetObject *so, setentry *entry)
+set_add_entry(PySetObject *so, setentry *entry)
 {
-    register Py_ssize_t n_used;
+    Py_ssize_t n_used;
     PyObject *key = entry->key;
     Py_hash_t hash = entry->hash;
 
@@ -376,10 +350,10 @@ set_add_entry(register PySetObject *so, setentry *entry)
 }
 
 static int
-set_add_key(register PySetObject *so, PyObject *key)
+set_add_key(PySetObject *so, PyObject *key)
 {
-    register Py_hash_t hash;
-    register Py_ssize_t n_used;
+    Py_hash_t hash;
+    Py_ssize_t n_used;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -404,7 +378,7 @@ set_add_key(register PySetObject *so, PyObject *key)
 
 static int
 set_discard_entry(PySetObject *so, setentry *oldentry)
-{       register setentry *entry;
+{       setentry *entry;
     PyObject *old_key;
 
     entry = (so->lookup)(so, oldentry->key, oldentry->hash);
@@ -413,7 +387,6 @@ set_discard_entry(PySetObject *so, setentry *oldentry)
     if (entry->key == NULL  ||  entry->key == dummy)
         return DISCARD_NOTFOUND;
     old_key = entry->key;
-    Py_INCREF(dummy);
     entry->key = dummy;
     so->used--;
     Py_DECREF(old_key);
@@ -423,8 +396,8 @@ set_discard_entry(PySetObject *so, setentry *oldentry)
 static int
 set_discard_key(PySetObject *so, PyObject *key)
 {
-    register Py_hash_t hash;
-    register setentry *entry;
+    Py_hash_t hash;
+    setentry *entry;
     PyObject *old_key;
 
     assert (PyAnySet_Check(so));
@@ -441,13 +414,23 @@ set_discard_key(PySetObject *so, PyObject *key)
     if (entry->key == NULL  ||  entry->key == dummy)
         return DISCARD_NOTFOUND;
     old_key = entry->key;
-    Py_INCREF(dummy);
     entry->key = dummy;
     so->used--;
     Py_DECREF(old_key);
     return DISCARD_FOUND;
 }
 
+static void
+set_empty_to_minsize(PySetObject *so)
+{
+    memset(so->smalltable, 0, sizeof(so->smalltable));
+    so->fill = 0;
+    so->used = 0;
+    so->mask = PySet_MINSIZE - 1;
+    so->table = so->smalltable;
+    so->hash = -1;
+}
+
 static int
 set_clear_internal(PySetObject *so)
 {
@@ -455,14 +438,13 @@ set_clear_internal(PySetObject *so)
     int table_is_malloced;
     Py_ssize_t fill;
     setentry small_copy[PySet_MINSIZE];
-#ifdef Py_DEBUG
-    Py_ssize_t i, n;
-    assert (PyAnySet_Check(so));
 
-    n = so->mask + 1;
-    i = 0;
+#ifdef Py_DEBUG
+    Py_ssize_t i = 0;
+    Py_ssize_t n = so->mask + 1;
 #endif
 
+    assert (PyAnySet_Check(so));
     table = so->table;
     assert(table != NULL);
     table_is_malloced = table != so->smalltable;
@@ -475,7 +457,7 @@ set_clear_internal(PySetObject *so)
      */
     fill = so->fill;
     if (table_is_malloced)
-        EMPTY_TO_MINSIZE(so);
+        set_empty_to_minsize(so);
 
     else if (fill > 0) {
         /* It's a small table with something that needs to be cleared.
@@ -484,7 +466,7 @@ set_clear_internal(PySetObject *so)
          */
         memcpy(small_copy, table, sizeof(small_copy));
         table = small_copy;
-        EMPTY_TO_MINSIZE(so);
+        set_empty_to_minsize(so);
     }
     /* else it's a small table that's already empty */
 
@@ -499,7 +481,8 @@ set_clear_internal(PySetObject *so)
 #endif
         if (entry->key) {
             --fill;
-            Py_DECREF(entry->key);
+            if (entry->key != dummy)
+                Py_DECREF(entry->key);
         }
 #ifdef Py_DEBUG
         else
@@ -530,7 +513,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
 {
     Py_ssize_t i;
     Py_ssize_t mask;
-    register setentry *table;
+    setentry *table;
 
     assert (PyAnySet_Check(so));
     i = *pos_ptr;
@@ -550,7 +533,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
 static void
 set_dealloc(PySetObject *so)
 {
-    register setentry *entry;
+    setentry *entry;
     Py_ssize_t fill = so->fill;
     PyObject_GC_UnTrack(so);
     Py_TRASHCAN_SAFE_BEGIN(so)
@@ -560,15 +543,13 @@ set_dealloc(PySetObject *so)
     for (entry = so->table; fill > 0; entry++) {
         if (entry->key) {
             --fill;
-            Py_DECREF(entry->key);
+            if (entry->key != dummy)
+                Py_DECREF(entry->key);
         }
     }
     if (so->table != so->smalltable)
         PyMem_DEL(so->table);
-    if (numfree < PySet_MAXFREELIST && PyAnySet_CheckExact(so))
-        free_list[numfree++] = so;
-    else
-        Py_TYPE(so)->tp_free(so);
+    Py_TYPE(so)->tp_free(so);
     Py_TRASHCAN_SAFE_END(so)
 }
 
@@ -629,8 +610,8 @@ set_merge(PySetObject *so, PyObject *otherset)
     PySetObject *other;
     PyObject *key;
     Py_hash_t hash;
-    register Py_ssize_t i;
-    register setentry *entry;
+    Py_ssize_t i;
+    setentry *entry;
 
     assert (PyAnySet_Check(so));
     assert (PyAnySet_Check(otherset));
@@ -698,8 +679,8 @@ set_contains_entry(PySetObject *so, setentry *entry)
 static PyObject *
 set_pop(PySetObject *so)
 {
-    register Py_ssize_t i = 0;
-    register setentry *entry;
+    Py_ssize_t i = 0;
+    setentry *entry;
     PyObject *key;
 
     assert (PyAnySet_Check(so));
@@ -731,7 +712,6 @@ set_pop(PySetObject *so)
         }
     }
     key = entry->key;
-    Py_INCREF(dummy);
     entry->key = dummy;
     so->used--;
     so->table[0].hash = i + 1;  /* next place to start */
@@ -755,6 +735,17 @@ set_traverse(PySetObject *so, visitproc visit, void *arg)
 static Py_hash_t
 frozenset_hash(PyObject *self)
 {
+    /* Most of the constants in this hash algorithm are randomly choosen
+       large primes with "interesting bit patterns" and that passed
+       tests for good collision statistics on a variety of problematic
+       datasets such as:
+
+          ps = []
+          for r in range(21):
+              ps += itertools.combinations(range(20), r)
+          num_distinct_hashes = len({hash(frozenset(s)) for s in ps})
+
+    */
     PySetObject *so = (PySetObject *)self;
     Py_uhash_t h, hash = 1927868237UL;
     setentry *entry;
@@ -771,8 +762,10 @@ frozenset_hash(PyObject *self)
            hashes so that many distinct combinations collapse to only
            a handful of distinct hash values. */
         h = entry->hash;
-        hash ^= (h ^ (h << 16) ^ 89869747UL)  * 3644798167UL;
+        hash ^= ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
     }
+    /* Make the final result spread-out in a different pattern
+       than the algorithm for tuples or other python objects. */
     hash = hash * 69069U + 907133923UL;
     if (hash == -1)
         hash = 590923713UL;
@@ -866,8 +859,8 @@ static PyMethodDef setiter_methods[] = {
 static PyObject *setiter_iternext(setiterobject *si)
 {
     PyObject *key;
-    register Py_ssize_t i, mask;
-    register setentry *entry;
+    Py_ssize_t i, mask;
+    setentry *entry;
     PySetObject *so = si->si_set;
 
     if (so == NULL)
@@ -1021,33 +1014,19 @@ PyDoc_STRVAR(update_doc,
 static PyObject *
 make_new_set(PyTypeObject *type, PyObject *iterable)
 {
-    register PySetObject *so = NULL;
-
-    if (dummy == NULL) { /* Auto-initialize dummy */
-        dummy = PyUnicode_FromString("<dummy key>");
-        if (dummy == NULL)
-            return NULL;
-    }
+    PySetObject *so = NULL;
 
     /* create PySetObject structure */
-    if (numfree &&
-        (type == &PySet_Type  ||  type == &PyFrozenSet_Type)) {
-        so = free_list[--numfree];
-        assert (so != NULL && PyAnySet_CheckExact(so));
-        Py_TYPE(so) = type;
-        _Py_NewReference((PyObject *)so);
-        EMPTY_TO_MINSIZE(so);
-        PyObject_GC_Track(so);
-    } else {
-        so = (PySetObject *)type->tp_alloc(type, 0);
-        if (so == NULL)
-            return NULL;
-        /* tp_alloc has already zeroed the structure */
-        assert(so->table == NULL && so->fill == 0 && so->used == 0);
-        INIT_NONZERO_SET_SLOTS(so);
-    }
+    so = (PySetObject *)type->tp_alloc(type, 0);
+    if (so == NULL)
+        return NULL;
 
+    so->fill = 0;
+    so->used = 0;
+    so->mask = PySet_MINSIZE - 1;
+    so->table = so->smalltable;
     so->lookup = set_lookkey_unicode;
+    so->hash = -1;
     so->weakreflist = NULL;
 
     if (iterable != NULL) {
@@ -1110,35 +1089,15 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 int
 PySet_ClearFreeList(void)
 {
-    int freelist_size = numfree;
-    PySetObject *so;
-
-    while (numfree) {
-        numfree--;
-        so = free_list[numfree];
-        PyObject_GC_Del(so);
-    }
-    return freelist_size;
+    return 0;
 }
 
 void
 PySet_Fini(void)
 {
-    PySet_ClearFreeList();
-    Py_CLEAR(dummy);
     Py_CLEAR(emptyfrozenset);
 }
 
-/* Print summary info about the state of the optimized allocator */
-void
-_PySet_DebugMallocStats(FILE *out)
-{
-    _PyDebugAllocatorStats(out,
-                           "free PySetObject",
-                           numfree, sizeof(PySetObject));
-}
-
-
 static PyObject *
 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
@@ -1607,9 +1566,15 @@ set_difference(PySetObject *so, PyObject *other)
     if (PyDict_CheckExact(other)) {
         while (set_next(so, &pos, &entry)) {
             setentry entrycopy;
+            int rv;
             entrycopy.hash = entry->hash;
             entrycopy.key = entry->key;
-            if (!_PyDict_Contains(other, entry->key, entry->hash)) {
+            rv = _PyDict_Contains(other, entry->key, entry->hash);
+            if (rv < 0) {
+                Py_DECREF(result);
+                return NULL;
+            }
+            if (!rv) {
                 if (set_add_entry((PySetObject *)result, &entrycopy) == -1) {
                     Py_DECREF(result);
                     return NULL;
@@ -1845,7 +1810,8 @@ PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
 static PyObject *
 set_richcompare(PySetObject *v, PyObject *w, int op)
 {
-    PyObject *r1, *r2;
+    PyObject *r1;
+    int r2;
 
     if(!PyAnySet_Check(w))
         Py_RETURN_NOTIMPLEMENTED;
@@ -1863,9 +1829,11 @@ set_richcompare(PySetObject *v, PyObject *w, int op)
         r1 = set_richcompare(v, w, Py_EQ);
         if (r1 == NULL)
             return NULL;
-        r2 = PyBool_FromLong(PyObject_Not(r1));
+        r2 = PyObject_IsTrue(r1);
         Py_DECREF(r1);
-        return r2;
+        if (r2 < 0)
+            return NULL;
+        return PyBool_FromLong(!r2);
     case Py_LE:
         return set_issubset(v, w);
     case Py_GE:
@@ -1949,7 +1917,7 @@ set_remove(PySetObject *so, PyObject *key)
     }
 
     if (rv == DISCARD_NOTFOUND) {
-        set_key_error(key);
+        _PyErr_SetKeyError(key);
         return NULL;
     }
     Py_RETURN_NONE;
@@ -2393,6 +2361,9 @@ _PySet_Update(PyObject *set, PyObject *iterable)
     return set_update_internal((PySetObject *)set, iterable);
 }
 
+/* Exported for the gdb plugin's benefit. */
+PyObject *_PySet_Dummy = dummy;
+
 #ifdef Py_DEBUG
 
 /* Test code to be called with any three element set.
@@ -2411,7 +2382,7 @@ test_c_api(PySetObject *so)
     Py_ssize_t count;
     char *s;
     Py_ssize_t i;
-    PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x;
+    PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
     PyObject *ob = (PyObject *)so;
     Py_hash_t hash;
     PyObject *str;
@@ -2534,3 +2505,46 @@ test_c_api(PySetObject *so)
 #undef assertRaises
 
 #endif
+
+/***** Dummy Struct  *************************************************/
+
+static PyObject *
+dummy_repr(PyObject *op)
+{
+    return PyUnicode_FromString("<dummy key>");
+}
+
+static void
+dummy_dealloc(PyObject* ignore)
+{
+    Py_FatalError("deallocating <dummy key>");
+}
+
+static PyTypeObject _PySetDummy_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "<dummy key> type",
+    0,
+    0,
+    dummy_dealloc,      /*tp_dealloc*/ /*never called*/
+    0,                  /*tp_print*/
+    0,                  /*tp_getattr*/
+    0,                  /*tp_setattr*/
+    0,                  /*tp_reserved*/
+    dummy_repr,         /*tp_repr*/
+    0,                  /*tp_as_number*/
+    0,                  /*tp_as_sequence*/
+    0,                  /*tp_as_mapping*/
+    0,                  /*tp_hash */
+    0,                  /*tp_call */
+    0,                  /*tp_str */
+    0,                  /*tp_getattro */
+    0,                  /*tp_setattro */
+    0,                  /*tp_as_buffer */
+    Py_TPFLAGS_DEFAULT, /*tp_flags */
+};
+
+static PyObject _dummy_struct = {
+  _PyObject_EXTRA_INIT
+  2, &_PySetDummy_Type
+};
+
diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c
index 1593335..1049523 100644
--- a/Objects/sliceobject.c
+++ b/Objects/sliceobject.c
@@ -33,6 +33,17 @@ ellipsis_repr(PyObject *op)
     return PyUnicode_FromString("Ellipsis");
 }
 
+static PyObject *
+ellipsis_reduce(PyObject *op)
+{
+    return PyUnicode_FromString("Ellipsis");
+}
+
+static PyMethodDef ellipsis_methods[] = {
+    {"__reduce__", (PyCFunction)ellipsis_reduce, METH_NOARGS, NULL},
+    {NULL, NULL}
+};
+
 PyTypeObject PyEllipsis_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     "ellipsis",                         /* tp_name */
@@ -61,7 +72,7 @@ PyTypeObject PyEllipsis_Type = {
     0,                                  /* tp_weaklistoffset */
     0,                                  /* tp_iter */
     0,                                  /* tp_iternext */
-    0,                                  /* tp_methods */
+    ellipsis_methods,                   /* tp_methods */
     0,                                  /* tp_members */
     0,                                  /* tp_getset */
     0,                                  /* tp_base */
@@ -299,23 +310,198 @@ static PyMemberDef slice_members[] = {
     {0}
 };
 
+/* Helper function to convert a slice argument to a PyLong, and raise TypeError
+   with a suitable message on failure. */
+
 static PyObject*
-slice_indices(PySliceObject* self, PyObject* len)
+evaluate_slice_index(PyObject *v)
+{
+    if (PyIndex_Check(v)) {
+        return PyNumber_Index(v);
+    }
+    else {
+        PyErr_SetString(PyExc_TypeError,
+                        "slice indices must be integers or "
+                        "None or have an __index__ method");
+        return NULL;
+    }
+}
+
+/* Compute slice indices given a slice and length.  Return -1 on failure.  Used
+   by slice.indices and rangeobject slicing.  Assumes that `len` is a
+   nonnegative instance of PyLong. */
+
+int
+_PySlice_GetLongIndices(PySliceObject *self, PyObject *length,
+                        PyObject **start_ptr, PyObject **stop_ptr,
+                        PyObject **step_ptr)
 {
-    Py_ssize_t ilen, start, stop, step, slicelength;
+    PyObject *start=NULL, *stop=NULL, *step=NULL;
+    PyObject *upper=NULL, *lower=NULL;
+    int step_is_negative, cmp_result;
+
+    /* Convert step to an integer; raise for zero step. */
+    if (self->step == Py_None) {
+        step = PyLong_FromLong(1L);
+        if (step == NULL)
+            goto error;
+        step_is_negative = 0;
+    }
+    else {
+        int step_sign;
+        step = evaluate_slice_index(self->step);
+        if (step == NULL)
+            goto error;
+        step_sign = _PyLong_Sign(step);
+        if (step_sign == 0) {
+            PyErr_SetString(PyExc_ValueError,
+                            "slice step cannot be zero");
+            goto error;
+        }
+        step_is_negative = step_sign < 0;
+    }
 
-    ilen = PyNumber_AsSsize_t(len, PyExc_OverflowError);
+    /* Find lower and upper bounds for start and stop. */
+    if (step_is_negative) {
+        lower = PyLong_FromLong(-1L);
+        if (lower == NULL)
+            goto error;
 
-    if (ilen == -1 && PyErr_Occurred()) {
-        return NULL;
+        upper = PyNumber_Add(length, lower);
+        if (upper == NULL)
+            goto error;
     }
+    else {
+        lower = PyLong_FromLong(0L);
+        if (lower == NULL)
+            goto error;
 
-    if (PySlice_GetIndicesEx((PyObject*)self, ilen, &start, &stop,
-                             &step, &slicelength) < 0) {
+        upper = length;
+        Py_INCREF(upper);
+    }
+
+    /* Compute start. */
+    if (self->start == Py_None) {
+        start = step_is_negative ? upper : lower;
+        Py_INCREF(start);
+    }
+    else {
+        start = evaluate_slice_index(self->start);
+        if (start == NULL)
+            goto error;
+
+        if (_PyLong_Sign(start) < 0) {
+            /* start += length */
+            PyObject *tmp = PyNumber_Add(start, length);
+            Py_DECREF(start);
+            start = tmp;
+            if (start == NULL)
+                goto error;
+
+            cmp_result = PyObject_RichCompareBool(start, lower, Py_LT);
+            if (cmp_result < 0)
+                goto error;
+            if (cmp_result) {
+                Py_INCREF(lower);
+                Py_DECREF(start);
+                start = lower;
+            }
+        }
+        else {
+            cmp_result = PyObject_RichCompareBool(start, upper, Py_GT);
+            if (cmp_result < 0)
+                goto error;
+            if (cmp_result) {
+                Py_INCREF(upper);
+                Py_DECREF(start);
+                start = upper;
+            }
+        }
+    }
+
+    /* Compute stop. */
+    if (self->stop == Py_None) {
+        stop = step_is_negative ? lower : upper;
+        Py_INCREF(stop);
+    }
+    else {
+        stop = evaluate_slice_index(self->stop);
+        if (stop == NULL)
+            goto error;
+
+        if (_PyLong_Sign(stop) < 0) {
+            /* stop += length */
+            PyObject *tmp = PyNumber_Add(stop, length);
+            Py_DECREF(stop);
+            stop = tmp;
+            if (stop == NULL)
+                goto error;
+
+            cmp_result = PyObject_RichCompareBool(stop, lower, Py_LT);
+            if (cmp_result < 0)
+                goto error;
+            if (cmp_result) {
+                Py_INCREF(lower);
+                Py_DECREF(stop);
+                stop = lower;
+            }
+        }
+        else {
+            cmp_result = PyObject_RichCompareBool(stop, upper, Py_GT);
+            if (cmp_result < 0)
+                goto error;
+            if (cmp_result) {
+                Py_INCREF(upper);
+                Py_DECREF(stop);
+                stop = upper;
+            }
+        }
+    }
+
+    *start_ptr = start;
+    *stop_ptr = stop;
+    *step_ptr = step;
+    Py_DECREF(upper);
+    Py_DECREF(lower);
+    return 0;
+
+  error:
+    *start_ptr = *stop_ptr = *step_ptr = NULL;
+    Py_XDECREF(start);
+    Py_XDECREF(stop);
+    Py_XDECREF(step);
+    Py_XDECREF(upper);
+    Py_XDECREF(lower);
+    return -1;
+}
+
+/* Implementation of slice.indices. */
+
+static PyObject*
+slice_indices(PySliceObject* self, PyObject* len)
+{
+    PyObject *start, *stop, *step;
+    PyObject *length;
+    int error;
+
+    /* Convert length to an integer if necessary; raise for negative length. */
+    length = PyNumber_Index(len);
+    if (length == NULL)
+        return NULL;
+
+    if (_PyLong_Sign(length) < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "length should not be negative");
+        Py_DECREF(length);
         return NULL;
     }
 
-    return Py_BuildValue("(nnn)", start, stop, step);
+    error = _PySlice_GetLongIndices(self, length, &start, &stop, &step);
+    Py_DECREF(length);
+    if (error == -1)
+        return NULL;
+    else
+        return Py_BuildValue("(NNN)", start, stop, step);
 }
 
 PyDoc_STRVAR(slice_indices_doc,
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt
index ab506d6..8ff6ad8 100644
--- a/Objects/stringlib/README.txt
+++ b/Objects/stringlib/README.txt
@@ -1,4 +1,4 @@
-bits shared by the stringobject and unicodeobject implementations (and
+bits shared by the bytesobject and unicodeobject implementations (and
 possibly other modules, in a not too distant future).
 
 the stuff in here is included into relevant places; see the individual
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
index f62813d..d0fc18d 100644
--- a/Objects/stringlib/asciilib.h
+++ b/Objects/stringlib/asciilib.h
@@ -19,7 +19,6 @@
 #define STRINGLIB_STR            PyUnicode_1BYTE_DATA
 #define STRINGLIB_LEN            PyUnicode_GET_LENGTH
 #define STRINGLIB_NEW(STR,LEN)   _PyUnicode_FromASCII((char*)(STR),(LEN))
-#define STRINGLIB_RESIZE         not_supported
 #define STRINGLIB_CHECK          PyUnicode_Check
 #define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
 
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index f353367..ee1bf2b 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -38,8 +38,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
             */
             if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) {
                 /* Help register allocation */
-                register const char *_s = s;
-                register STRINGLIB_CHAR *_p = p;
+                const char *_s = s;
+                STRINGLIB_CHAR *_p = p;
                 while (_s < aligned_end) {
                     /* Read a whole long at a time (either 4 or 8 bytes),
                        and do a fast unrolled copy if it only contains ASCII
@@ -47,7 +47,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
                     unsigned long value = *(unsigned long *) _s;
                     if (value & ASCII_CHAR_MASK)
                         break;
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
                     _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);
                     _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
                     _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
@@ -486,7 +486,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
     const unsigned char *q = *inptr;
     STRINGLIB_CHAR *p = dest + *outpos;
     /* Offsets from q for retrieving byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
     int ihi = !!native_ordering, ilo = !native_ordering;
 #else
     int ihi = !native_ordering, ilo = !!native_ordering;
@@ -499,7 +499,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
            reads are more expensive, better to defer to another iteration. */
         if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {
             /* Fast path for runs of in-range non-surrogate chars. */
-            register const unsigned char *_q = q;
+            const unsigned char *_q = q;
             while (_q < aligned_end) {
                 unsigned long block = * (unsigned long *) _q;
                 if (native_ordering) {
@@ -517,7 +517,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
                     block = SWAB(block);
 #endif
                 }
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
 # if SIZEOF_LONG == 4
                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
                 p[1] = (STRINGLIB_CHAR)(block >> 16);
@@ -596,26 +596,30 @@ IllegalSurrogate:
 #undef SWAB
 
 
-Py_LOCAL_INLINE(void)
-STRINGLIB(utf16_encode)(unsigned short *out,
-                        const STRINGLIB_CHAR *in,
+#if STRINGLIB_MAX_CHAR >= 0x80
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,
                         Py_ssize_t len,
+                        unsigned short **outptr,
                         int native_ordering)
 {
+    unsigned short *out = *outptr;
     const STRINGLIB_CHAR *end = in + len;
 #if STRINGLIB_SIZEOF_CHAR == 1
-# define SWAB2(CH)  ((CH) << 8)
-#else
-# define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))
-#endif
-#if STRINGLIB_MAX_CHAR < 0x10000
     if (native_ordering) {
-# if STRINGLIB_SIZEOF_CHAR == 2
-        Py_MEMCPY(out, in, 2 * len);
-# else
-        _PyUnicode_CONVERT_BYTES(STRINGLIB_CHAR, unsigned short, in, end, out);
-# endif
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+            out[0] = in[0];
+            out[1] = in[1];
+            out[2] = in[2];
+            out[3] = in[3];
+            in += 4; out += 4;
+        }
+        while (in < end) {
+            *out++ = *in++;
+        }
     } else {
+# define SWAB2(CH)  ((CH) << 8) /* high byte is zero */
         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
         while (in < unrolled_end) {
             out[0] = SWAB2(in[0]);
@@ -625,37 +629,95 @@ STRINGLIB(utf16_encode)(unsigned short *out,
             in += 4; out += 4;
         }
         while (in < end) {
-            *out++ = SWAB2(*in);
-            ++in;
+            Py_UCS4 ch = *in++;
+            *out++ = SWAB2((Py_UCS2)ch);
         }
+#undef SWAB2
     }
+    *outptr = out;
+    return len;
 #else
     if (native_ordering) {
+#if STRINGLIB_MAX_CHAR < 0x10000
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+            /* check if any character is a surrogate character */
+            if (((in[0] ^ 0xd800) &
+                 (in[1] ^ 0xd800) &
+                 (in[2] ^ 0xd800) &
+                 (in[3] ^ 0xd800) & 0xf800) == 0)
+                break;
+            out[0] = in[0];
+            out[1] = in[1];
+            out[2] = in[2];
+            out[3] = in[3];
+            in += 4; out += 4;
+        }
+#endif
         while (in < end) {
-            Py_UCS4 ch = *in++;
-            if (ch < 0x10000)
+            Py_UCS4 ch;
+            ch = *in++;
+            if (ch < 0xd800)
                 *out++ = ch;
-            else {
+            else if (ch < 0xe000)
+                /* reject surrogate characters (U+D800-U+DFFF) */
+                goto fail;
+#if STRINGLIB_MAX_CHAR >= 0x10000
+            else if (ch >= 0x10000) {
                 out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
                 out[1] = Py_UNICODE_LOW_SURROGATE(ch);
                 out += 2;
             }
+#endif
+            else
+                *out++ = ch;
         }
     } else {
+#define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))
+#if STRINGLIB_MAX_CHAR < 0x10000
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+            /* check if any character is a surrogate character */
+            if (((in[0] ^ 0xd800) &
+                 (in[1] ^ 0xd800) &
+                 (in[2] ^ 0xd800) &
+                 (in[3] ^ 0xd800) & 0xf800) == 0)
+                break;
+            out[0] = SWAB2(in[0]);
+            out[1] = SWAB2(in[1]);
+            out[2] = SWAB2(in[2]);
+            out[3] = SWAB2(in[3]);
+            in += 4; out += 4;
+        }
+#endif
         while (in < end) {
             Py_UCS4 ch = *in++;
-            if (ch < 0x10000)
+            if (ch < 0xd800)
                 *out++ = SWAB2((Py_UCS2)ch);
-            else {
+            else if (ch < 0xe000)
+                /* reject surrogate characters (U+D800-U+DFFF) */
+                goto fail;
+#if STRINGLIB_MAX_CHAR >= 0x10000
+            else if (ch >= 0x10000) {
                 Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);
                 Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
                 out[0] = SWAB2(ch1);
                 out[1] = SWAB2(ch2);
                 out += 2;
             }
+#endif
+            else
+                *out++ = SWAB2((Py_UCS2)ch);
         }
+#undef SWAB2
     }
+    *outptr = out;
+    return len;
+  fail:
+    *outptr = out;
+    return len - (end - in + 1);
 #endif
-#undef SWAB2
 }
+#endif
+
 #endif /* STRINGLIB_IS_UNICODE */
diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h
index 3e5f510..f8fd384 100644
--- a/Objects/stringlib/eq.h
+++ b/Objects/stringlib/eq.h
@@ -6,8 +6,8 @@
 Py_LOCAL_INLINE(int)
 unicode_eq(PyObject *aa, PyObject *bb)
 {
-    register PyUnicodeObject *a = (PyUnicodeObject *)aa;
-    register PyUnicodeObject *b = (PyUnicodeObject *)bb;
+    PyUnicodeObject *a = (PyUnicodeObject *)aa;
+    PyUnicodeObject *b = (PyUnicodeObject *)bb;
 
     if (PyUnicode_READY(a) == -1 || PyUnicode_READY(b) == -1) {
         assert(0 && "unicode_eq ready fail");
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 55ac77d..cd7cac4 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -142,6 +142,8 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
     mask = 0;
 
     if (mode != FAST_RSEARCH) {
+        const STRINGLIB_CHAR *ss = s + m - 1;
+        const STRINGLIB_CHAR *pp = p + m - 1;
 
         /* create compressed boyer-moore delta 1 table */
 
@@ -156,7 +158,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
 
         for (i = 0; i <= w; i++) {
             /* note: using mlast in the skip path slows things down on x86 */
-            if (s[i+m-1] == p[m-1]) {
+            if (ss[i] == pp[0]) {
                 /* candidate match */
                 for (j = 0; j < mlast; j++)
                     if (s[i+j] != p[j])
@@ -172,13 +174,13 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
                     continue;
                 }
                 /* miss: check if next character is part of pattern */
-                if (!STRINGLIB_BLOOM(mask, s[i+m]))
+                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
                     i = i + m;
                 else
                     i = i + skip;
             } else {
                 /* skip: check if next character is part of pattern */
-                if (!STRINGLIB_BLOOM(mask, s[i+m]))
+                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
                     i = i + m;
             }
         }
diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h
index 06559c8..eb3fe88 100644
--- a/Objects/stringlib/find_max_char.h
+++ b/Objects/stringlib/find_max_char.h
@@ -24,7 +24,7 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
     while (p < end) {
         if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
             /* Help register allocation */
-            register const unsigned char *_p = p;
+            const unsigned char *_p = p;
             while (_p < aligned_end) {
                 unsigned long value = *(unsigned long *) _p;
                 if (value & UCS1_ASCII_CHAR_MASK)
@@ -66,7 +66,7 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
 #else
 #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)
 #endif
-    register Py_UCS4 mask;
+    Py_UCS4 mask;
     Py_ssize_t n = end - begin;
     const STRINGLIB_CHAR *p = begin;
     const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);
diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h
new file mode 100644
index 0000000..cbf81be
--- /dev/null
+++ b/Objects/stringlib/join.h
@@ -0,0 +1,140 @@
+/* stringlib: bytes joining implementation */
+
+#if STRINGLIB_SIZEOF_CHAR != 1
+#error join.h only compatible with byte-wise strings
+#endif
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
+{
+    char *sepstr = STRINGLIB_STR(sep);
+    const Py_ssize_t seplen = STRINGLIB_LEN(sep);
+    PyObject *res = NULL;
+    char *p;
+    Py_ssize_t seqlen = 0;
+    Py_ssize_t sz = 0;
+    Py_ssize_t i, nbufs;
+    PyObject *seq, *item;
+    Py_buffer *buffers = NULL;
+#define NB_STATIC_BUFFERS 10
+    Py_buffer static_buffers[NB_STATIC_BUFFERS];
+
+    seq = PySequence_Fast(iterable, "can only join an iterable");
+    if (seq == NULL) {
+        return NULL;
+    }
+
+    seqlen = PySequence_Fast_GET_SIZE(seq);
+    if (seqlen == 0) {
+        Py_DECREF(seq);
+        return STRINGLIB_NEW(NULL, 0);
+    }
+#ifndef STRINGLIB_MUTABLE
+    if (seqlen == 1) {
+        item = PySequence_Fast_GET_ITEM(seq, 0);
+        if (STRINGLIB_CHECK_EXACT(item)) {
+            Py_INCREF(item);
+            Py_DECREF(seq);
+            return item;
+        }
+    }
+#endif
+    if (seqlen > NB_STATIC_BUFFERS) {
+        buffers = PyMem_NEW(Py_buffer, seqlen);
+        if (buffers == NULL) {
+            Py_DECREF(seq);
+            PyErr_NoMemory();
+            return NULL;
+        }
+    }
+    else {
+        buffers = static_buffers;
+    }
+
+    /* Here is the general case.  Do a pre-pass to figure out the total
+     * amount of space we'll need (sz), and see whether all arguments are
+     * bytes-like.
+     */
+    for (i = 0, nbufs = 0; i < seqlen; i++) {
+        Py_ssize_t itemlen;
+        item = PySequence_Fast_GET_ITEM(seq, i);
+        if (PyBytes_CheckExact(item)) {
+            /* Fast path. */
+            Py_INCREF(item);
+            buffers[i].obj = item;
+            buffers[i].buf = PyBytes_AS_STRING(item);
+            buffers[i].len = PyBytes_GET_SIZE(item);
+        }
+        else if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
+            PyErr_Format(PyExc_TypeError,
+                         "sequence item %zd: expected a bytes-like object, "
+                         "%.80s found",
+                         i, Py_TYPE(item)->tp_name);
+            goto error;
+        }
+        nbufs = i + 1;  /* for error cleanup */
+        itemlen = buffers[i].len;
+        if (itemlen > PY_SSIZE_T_MAX - sz) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "join() result is too long");
+            goto error;
+        }
+        sz += itemlen;
+        if (i != 0) {
+            if (seplen > PY_SSIZE_T_MAX - sz) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "join() result is too long");
+                goto error;
+            }
+            sz += seplen;
+        }
+        if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
+            PyErr_SetString(PyExc_RuntimeError,
+                            "sequence changed size during iteration");
+            goto error;
+        }
+    }
+
+    /* Allocate result space. */
+    res = STRINGLIB_NEW(NULL, sz);
+    if (res == NULL)
+        goto error;
+
+    /* Catenate everything. */
+    p = STRINGLIB_STR(res);
+    if (!seplen) {
+        /* fast path */
+        for (i = 0; i < nbufs; i++) {
+            Py_ssize_t n = buffers[i].len;
+            char *q = buffers[i].buf;
+            Py_MEMCPY(p, q, n);
+            p += n;
+        }
+        goto done;
+    }
+    for (i = 0; i < nbufs; i++) {
+        Py_ssize_t n;
+        char *q;
+        if (i) {
+            Py_MEMCPY(p, sepstr, seplen);
+            p += seplen;
+        }
+        n = buffers[i].len;
+        q = buffers[i].buf;
+        Py_MEMCPY(p, q, n);
+        p += n;
+    }
+    goto done;
+
+error:
+    res = NULL;
+done:
+    Py_DECREF(seq);
+    for (i = 0; i < nbufs; i++)
+        PyBuffer_Release(&buffers[i]);
+    if (buffers != static_buffers)
+        PyMem_FREE(buffers);
+    return res;
+}
+
+#undef NB_STATIC_BUFFERS
diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h
index 40cb512..ed32a6f 100644
--- a/Objects/stringlib/partition.h
+++ b/Objects/stringlib/partition.h
@@ -29,6 +29,11 @@ STRINGLIB(partition)(PyObject* str_obj,
         PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
         PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
         PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
+
+        if (PyErr_Occurred()) {
+            Py_DECREF(out);
+            return NULL;
+        }
 #else
         Py_INCREF(str_obj);
         PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
@@ -79,6 +84,11 @@ STRINGLIB(rpartition)(PyObject* str_obj,
         PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
         PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
         PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
+
+        if (PyErr_Occurred()) {
+            Py_DECREF(out);
+            return NULL;
+        }
 #else
         Py_INCREF(STRINGLIB_EMPTY);
         PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h
new file mode 100644
index 0000000..ef318ed
--- /dev/null
+++ b/Objects/stringlib/replace.h
@@ -0,0 +1,53 @@
+/* stringlib: replace implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
+                                 Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+    *s = u2;
+    while (--maxcount && ++s != end) {
+        /* Find the next character to be replaced.
+
+           If it occurs often, it is faster to scan for it using an inline
+           loop.  If it occurs seldom, it is faster to scan for it using a
+           function call; the overhead of the function call is amortized
+           across the many characters that call covers.  We start with an
+           inline loop and use a heuristic to determine whether to fall back
+           to a function call. */
+        if (*s != u1) {
+            int attempts = 10;
+            /* search u1 in a dummy loop */
+            while (1) {
+                if (++s == end)
+                    return;
+                if (*s == u1)
+                    break;
+                if (!--attempts) {
+                    /* if u1 was not found for attempts iterations,
+                       use FASTSEARCH() or memchr() */
+#if STRINGLIB_SIZEOF_CHAR == 1
+                    s++;
+                    s = memchr(s, u1, end - s);
+                    if (s == NULL)
+                        return;
+#else
+                    Py_ssize_t i;
+                    STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
+                    s++;
+                    i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
+                    if (i < 0)
+                        return;
+                    s += i;
+#endif
+                    /* restart the dummy loop */
+                    break;
+                }
+            }
+        }
+        *s = u2;
+    }
+}
diff --git a/Objects/stringlib/split.h b/Objects/stringlib/split.h
index 947dd28..31f77a7 100644
--- a/Objects/stringlib/split.h
+++ b/Objects/stringlib/split.h
@@ -345,8 +345,8 @@ STRINGLIB(splitlines)(PyObject* str_obj,
        and the appends only done when the prealloc buffer is full.
        That's too much work for little gain.*/
 
-    register Py_ssize_t i;
-    register Py_ssize_t j;
+    Py_ssize_t i;
+    Py_ssize_t j;
     PyObject *list = PyList_New(0);
     PyObject *sub;
 
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
index 7bb91a7..ce27f3e 100644
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -21,7 +21,6 @@
 #define STRINGLIB_STR            PyBytes_AS_STRING
 #define STRINGLIB_LEN            PyBytes_GET_SIZE
 #define STRINGLIB_NEW            PyBytes_FromStringAndSize
-#define STRINGLIB_RESIZE         _PyBytes_Resize
 #define STRINGLIB_CHECK          PyBytes_Check
 #define STRINGLIB_CHECK_EXACT    PyBytes_CheckExact
 #define STRINGLIB_TOSTR          PyObject_Str
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
index cbd7144..cae6ea1 100644
--- a/Objects/stringlib/transmogrify.h
+++ b/Objects/stringlib/transmogrify.h
@@ -5,21 +5,23 @@
    shared code in bytes_methods.c to cut down on duplicate code bloat.  */
 
 PyDoc_STRVAR(expandtabs__doc__,
-"B.expandtabs([tabsize]) -> copy of B\n\
+"B.expandtabs(tabsize=8) -> copy of B\n\
 \n\
 Return a copy of B where all tab characters are expanded using spaces.\n\
 If tabsize is not given, a tab size of 8 characters is assumed.");
 
 static PyObject*
-stringlib_expandtabs(PyObject *self, PyObject *args)
+stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
 {
     const char *e, *p;
     char *q;
     Py_ssize_t i, j;
     PyObject *u;
+    static char *kwlist[] = {"tabsize", 0};
     int tabsize = 8;
 
-    if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs",
+                                     kwlist, &tabsize))
         return NULL;
 
     /* First pass: determine size of output string */
diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h
index e8c6fcb..ce1eb57 100644
--- a/Objects/stringlib/ucs1lib.h
+++ b/Objects/stringlib/ucs1lib.h
@@ -19,7 +19,6 @@
 #define STRINGLIB_STR            PyUnicode_1BYTE_DATA
 #define STRINGLIB_LEN            PyUnicode_GET_LENGTH
 #define STRINGLIB_NEW            _PyUnicode_FromUCS1
-#define STRINGLIB_RESIZE         not_supported
 #define STRINGLIB_CHECK          PyUnicode_Check
 #define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
 
diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h
index 45e5729..f900cb6 100644
--- a/Objects/stringlib/ucs2lib.h
+++ b/Objects/stringlib/ucs2lib.h
@@ -19,7 +19,6 @@
 #define STRINGLIB_STR            PyUnicode_2BYTE_DATA
 #define STRINGLIB_LEN            PyUnicode_GET_LENGTH
 #define STRINGLIB_NEW            _PyUnicode_FromUCS2
-#define STRINGLIB_RESIZE         not_supported
 #define STRINGLIB_CHECK          PyUnicode_Check
 #define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
 
diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h
index 647a27e..86a480f 100644
--- a/Objects/stringlib/ucs4lib.h
+++ b/Objects/stringlib/ucs4lib.h
@@ -19,7 +19,6 @@
 #define STRINGLIB_STR            PyUnicode_4BYTE_DATA
 #define STRINGLIB_LEN            PyUnicode_GET_LENGTH
 #define STRINGLIB_NEW            _PyUnicode_FromUCS4
-#define STRINGLIB_RESIZE         not_supported
 #define STRINGLIB_CHECK          PyUnicode_Check
 #define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
 
diff --git a/Objects/stringlib/undef.h b/Objects/stringlib/undef.h
index 03117ec..f9d3f1d 100644
--- a/Objects/stringlib/undef.h
+++ b/Objects/stringlib/undef.h
@@ -6,7 +6,6 @@
 #undef  STRINGLIB_STR
 #undef  STRINGLIB_LEN
 #undef  STRINGLIB_NEW
-#undef  STRINGLIB_RESIZE
 #undef  _Py_InsertThousandsGrouping
 #undef STRINGLIB_IS_UNICODE
 
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h
index d4719a5..aec221a 100644
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@@ -543,7 +543,7 @@ done:
 
 static int
 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
-            Py_UCS4 *conversion)
+            int *format_spec_needs_expanding, Py_UCS4 *conversion)
 {
     /* Note this function works if the field name is zero length,
        which is good.  Zero length field names are handled later, in
@@ -561,6 +561,15 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
     field_name->start = str->start;
     while (str->start < str->end) {
         switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
+        case '{':
+            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
+            return 0;
+        case '[':
+            for (; str->start < str->end; str->start++)
+                if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
+                    break;
+            continue;
+        case '}':
         case ':':
         case '!':
             break;
@@ -570,41 +579,62 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
         break;
     }
 
+    field_name->end = str->start - 1;
     if (c == '!' || c == ':') {
+        Py_ssize_t count;
         /* we have a format specifier and/or a conversion */
         /* don't include the last character */
-        field_name->end = str->start-1;
-
-        /* the format specifier is the rest of the string */
-        format_spec->str = str->str;
-        format_spec->start = str->start;
-        format_spec->end = str->end;
 
         /* see if there's a conversion specifier */
         if (c == '!') {
             /* there must be another character present */
-            if (format_spec->start >= format_spec->end) {
+            if (str->start >= str->end) {
                 PyErr_SetString(PyExc_ValueError,
-                                "end of format while looking for conversion "
+                                "end of string while looking for conversion "
                                 "specifier");
                 return 0;
             }
-            *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+            *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
 
-            /* if there is another character, it must be a colon */
-            if (format_spec->start < format_spec->end) {
-                c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+            if (str->start < str->end) {
+                c = PyUnicode_READ_CHAR(str->str, str->start++);
+                if (c == '}')
+                    return 1;
                 if (c != ':') {
                     PyErr_SetString(PyExc_ValueError,
-                                    "expected ':' after format specifier");
+                                    "expected ':' after conversion specifier");
                     return 0;
                 }
             }
         }
+        format_spec->str = str->str;
+        format_spec->start = str->start;
+        count = 1;
+        while (str->start < str->end) {
+            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
+            case '{':
+                *format_spec_needs_expanding = 1;
+                count++;
+                break;
+            case '}':
+                count--;
+                if (count == 0) {
+                    format_spec->end = str->start - 1;
+                    return 1;
+                }
+                break;
+            default:
+                break;
+            }
+        }
+
+        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
+        return 0;
+    }
+    else if (c != '}') {
+        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
+        return 0;
     }
-    else
-        /* end of string, there's no format_spec or conversion */
-        field_name->end = str->start;
 
     return 1;
 }
@@ -638,10 +668,9 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
                     SubString *format_spec, Py_UCS4 *conversion,
                     int *format_spec_needs_expanding)
 {
-    int at_end, hit_format_spec;
+    int at_end;
     Py_UCS4 c = 0;
     Py_ssize_t start;
-    int count;
     Py_ssize_t len;
     int markup_follows = 0;
 
@@ -713,52 +742,12 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
     if (!markup_follows)
         return 2;
 
-    /* this is markup, find the end of the string by counting nested
-       braces.  note that this prohibits escaped braces, so that
-       format_specs cannot have braces in them. */
+    /* this is markup; parse the field */
     *field_present = 1;
-    count = 1;
-
-    start = self->str.start;
-
-    /* we know we can't have a zero length string, so don't worry
-       about that case */
-    hit_format_spec = 0;
-    while (self->str.start < self->str.end) {
-        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
-        case ':':
-            if (!hit_format_spec) {
-                count = 1;
-                hit_format_spec = 1;
-            }
-            break;
-        case '{':
-            /* the format spec needs to be recursively expanded.
-               this is an optimization, and not strictly needed */
-            if (hit_format_spec)
-                *format_spec_needs_expanding = 1;
-            count++;
-            break;
-        case '}':
-            count--;
-            if (count <= 0) {
-                /* we're done.  parse and get out */
-                SubString s;
-
-                SubString_init(&s, self->str.str, start, self->str.start - 1);
-                if (parse_field(&s, field_name, format_spec, conversion) == 0)
-                    return 0;
-
-                /* success */
-                return 2;
-            }
-            break;
-        }
-    }
-
-    /* end of string while searching for matching '}' */
-    PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
-    return 0;
+    if (!parse_field(&self->str, field_name, format_spec,
+                     format_spec_needs_expanding, conversion))
+        return 0;
+    return 2;
 }
 
 
@@ -877,25 +866,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
     SubString literal;
     SubString field_name;
     SubString format_spec;
-    Py_UCS4 conversion, maxchar;
-    Py_ssize_t sublen;
-    int err;
+    Py_UCS4 conversion;
 
     MarkupIterator_init(&iter, input->str, input->start, input->end);
     while ((result = MarkupIterator_next(&iter, &literal, &field_present,
                                          &field_name, &format_spec,
                                          &conversion,
                                          &format_spec_needs_expanding)) == 2) {
-        sublen = literal.end - literal.start;
-        if (sublen) {
-            maxchar = _PyUnicode_FindMaxChar(literal.str,
-                                             literal.start, literal.end);
-            err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
-            if (err == -1)
+        if (literal.end != literal.start) {
+            if (!field_present && iter.str.start == iter.str.end)
+                writer->overallocate = 0;
+            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
+                                                literal.start, literal.end) < 0)
                 return 0;
-            _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
-                                          literal.str, literal.start, sublen);
-            writer->pos += sublen;
         }
 
         if (field_present) {
@@ -920,7 +903,6 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
              int recursion_depth, AutoNumber *auto_number)
 {
     _PyUnicodeWriter writer;
-    Py_ssize_t minlen;
 
     /* check the recursion level */
     if (recursion_depth <= 0) {
@@ -929,8 +911,9 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
         return NULL;
     }
 
-    minlen = PyUnicode_GET_LENGTH(input->str) + 100;
-    _PyUnicodeWriter_Init(&writer, minlen);
+    _PyUnicodeWriter_Init(&writer);
+    writer.overallocate = 1;
+    writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
 
     if (!do_markup(input, args, kwargs, &writer, recursion_depth,
                    auto_number)) {
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index f16f21e..3db5629 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -21,17 +21,11 @@
 #define STRINGLIB_STR            PyUnicode_AS_UNICODE
 #define STRINGLIB_LEN            PyUnicode_GET_SIZE
 #define STRINGLIB_NEW            PyUnicode_FromUnicode
-#define STRINGLIB_RESIZE         PyUnicode_Resize
 #define STRINGLIB_CHECK          PyUnicode_Check
 #define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
 
-#if PY_VERSION_HEX < 0x03000000
-#define STRINGLIB_TOSTR          PyObject_Unicode
-#define STRINGLIB_TOASCII        PyObject_Repr
-#else
 #define STRINGLIB_TOSTR          PyObject_Str
 #define STRINGLIB_TOASCII        PyObject_ASCII
-#endif
 
 #define STRINGLIB_WANT_CONTAINS_OBJ 1
 
diff --git a/Objects/structseq.c b/Objects/structseq.c
index c3b9a72..664344b 100644
--- a/Objects/structseq.c
+++ b/Objects/structseq.c
@@ -11,17 +11,20 @@ static char unnamed_fields_key[] = "n_unnamed_fields";
 /* Fields with this name have only a field index, not a field name.
    They are only allowed for indices < n_visible_fields. */
 char *PyStructSequence_UnnamedField = "unnamed field";
+_Py_IDENTIFIER(n_sequence_fields);
+_Py_IDENTIFIER(n_fields);
+_Py_IDENTIFIER(n_unnamed_fields);
 
 #define VISIBLE_SIZE(op) Py_SIZE(op)
 #define VISIBLE_SIZE_TP(tp) PyLong_AsLong( \
-                      PyDict_GetItemString((tp)->tp_dict, visible_length_key))
+                      _PyDict_GetItemId((tp)->tp_dict, &PyId_n_sequence_fields))
 
 #define REAL_SIZE_TP(tp) PyLong_AsLong( \
-                      PyDict_GetItemString((tp)->tp_dict, real_length_key))
+                      _PyDict_GetItemId((tp)->tp_dict, &PyId_n_fields))
 #define REAL_SIZE(op) REAL_SIZE_TP(Py_TYPE(op))
 
 #define UNNAMED_FIELDS_TP(tp) PyLong_AsLong( \
-                      PyDict_GetItemString((tp)->tp_dict, unnamed_fields_key))
+                      _PyDict_GetItemId((tp)->tp_dict, &PyId_n_unnamed_fields))
 #define UNNAMED_FIELDS(op) UNNAMED_FIELDS_TP(Py_TYPE(op))
 
 
@@ -59,7 +62,7 @@ static void
 structseq_dealloc(PyStructSequence *obj)
 {
     Py_ssize_t i, size;
-    
+
     size = REAL_SIZE(obj);
     for (i = 0; i < size; ++i) {
         Py_XDECREF(obj->ob_item[i]);
@@ -230,8 +233,8 @@ structseq_repr(PyStructSequence *obj)
 static PyObject *
 structseq_reduce(PyStructSequence* self)
 {
-    PyObject* tup;
-    PyObject* dict;
+    PyObject* tup = NULL;
+    PyObject* dict = NULL;
     PyObject* result;
     Py_ssize_t n_fields, n_visible_fields, n_unnamed_fields;
     int i;
@@ -240,15 +243,12 @@ structseq_reduce(PyStructSequence* self)
     n_visible_fields = VISIBLE_SIZE(self);
     n_unnamed_fields = UNNAMED_FIELDS(self);
     tup = PyTuple_New(n_visible_fields);
-    if (!tup) {
-        return NULL;
-    }
+    if (!tup)
+        goto error;
 
     dict = PyDict_New();
-    if (!dict) {
-        Py_DECREF(tup);
-        return NULL;
-    }
+    if (!dict)
+        goto error;
 
     for (i = 0; i < n_visible_fields; i++) {
         Py_INCREF(self->ob_item[i]);
@@ -257,8 +257,8 @@ structseq_reduce(PyStructSequence* self)
 
     for (; i < n_fields; i++) {
         char *n = Py_TYPE(self)->tp_members[i-n_unnamed_fields].name;
-        PyDict_SetItemString(dict, n,
-                             self->ob_item[i]);
+        if (PyDict_SetItemString(dict, n, self->ob_item[i]) < 0)
+            goto error;
     }
 
     result = Py_BuildValue("(O(OO))", Py_TYPE(self), tup, dict);
@@ -267,6 +267,11 @@ structseq_reduce(PyStructSequence* self)
     Py_DECREF(dict);
 
     return result;
+
+error:
+    Py_XDECREF(tup);
+    Py_XDECREF(dict);
+    return NULL;
 }
 
 static PyMethodDef structseq_methods[] = {
@@ -315,12 +320,13 @@ static PyTypeObject _struct_sequence_template = {
     structseq_new,                              /* tp_new */
 };
 
-void
-PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc)
+int
+PyStructSequence_InitType2(PyTypeObject *type, PyStructSequence_Desc *desc)
 {
     PyObject *dict;
     PyMemberDef* members;
     int n_members, n_unnamed_members, i, k;
+    PyObject *v;
 
 #ifdef Py_TRACE_REFS
     /* if the type object was chained, unchain it first
@@ -342,8 +348,10 @@ PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc)
     type->tp_doc = desc->doc;
 
     members = PyMem_NEW(PyMemberDef, n_members-n_unnamed_members+1);
-    if (members == NULL)
-        return;
+    if (members == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
 
     for (i = k = 0; i < n_members; ++i) {
         if (desc->fields[i].name == PyStructSequence_UnnamedField)
@@ -361,30 +369,56 @@ PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc)
     type->tp_members = members;
 
     if (PyType_Ready(type) < 0)
-        return;
+        return -1;
     Py_INCREF(type);
 
     dict = type->tp_dict;
 #define SET_DICT_FROM_INT(key, value)                           \
     do {                                                        \
-        PyObject *v = PyLong_FromLong((long) value);            \
-        if (v != NULL) {                                        \
-            PyDict_SetItemString(dict, key, v);                 \
+        v = PyLong_FromLong((long) value);                      \
+        if (v == NULL)                                          \
+            return -1;                                          \
+        if (PyDict_SetItemString(dict, key, v) < 0) {           \
             Py_DECREF(v);                                       \
+            return -1;                                          \
         }                                                       \
+        Py_DECREF(v);                                           \
     } while (0)
 
     SET_DICT_FROM_INT(visible_length_key, desc->n_in_sequence);
     SET_DICT_FROM_INT(real_length_key, n_members);
     SET_DICT_FROM_INT(unnamed_fields_key, n_unnamed_members);
+
+    return 0;
+}
+
+void
+PyStructSequence_InitType(PyTypeObject *type, PyStructSequence_Desc *desc)
+{
+    (void)PyStructSequence_InitType2(type, desc);
 }
 
 PyTypeObject*
 PyStructSequence_NewType(PyStructSequence_Desc *desc)
 {
-    PyTypeObject *result = (PyTypeObject*)PyType_GenericAlloc(&PyType_Type, 0);
-    if (result != NULL) {
-        PyStructSequence_InitType(result, desc);
+    PyTypeObject *result;
+
+    result = (PyTypeObject*)PyType_GenericAlloc(&PyType_Type, 0);
+    if (result == NULL)
+        return NULL;
+    if (PyStructSequence_InitType2(result, desc) < 0) {
+        Py_DECREF(result);
+        return NULL;
     }
     return result;
 }
+
+int _PyStructSequence_Init(void)
+{
+    if (_PyUnicode_FromId(&PyId_n_sequence_fields) == NULL
+        || _PyUnicode_FromId(&PyId_n_fields) == NULL
+        || _PyUnicode_FromId(&PyId_n_unnamed_fields) == NULL)
+        return -1;
+
+    return 0;
+}
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index f815595..8c00210 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -63,9 +63,9 @@ _PyTuple_DebugMallocStats(FILE *out)
 }
 
 PyObject *
-PyTuple_New(register Py_ssize_t size)
+PyTuple_New(Py_ssize_t size)
 {
-    register PyTupleObject *op;
+    PyTupleObject *op;
     Py_ssize_t i;
     if (size < 0) {
         PyErr_BadInternalCall();
@@ -122,7 +122,7 @@ PyTuple_New(register Py_ssize_t size)
 }
 
 Py_ssize_t
-PyTuple_Size(register PyObject *op)
+PyTuple_Size(PyObject *op)
 {
     if (!PyTuple_Check(op)) {
         PyErr_BadInternalCall();
@@ -133,7 +133,7 @@ PyTuple_Size(register PyObject *op)
 }
 
 PyObject *
-PyTuple_GetItem(register PyObject *op, register Py_ssize_t i)
+PyTuple_GetItem(PyObject *op, Py_ssize_t i)
 {
     if (!PyTuple_Check(op)) {
         PyErr_BadInternalCall();
@@ -147,10 +147,10 @@ PyTuple_GetItem(register PyObject *op, register Py_ssize_t i)
 }
 
 int
-PyTuple_SetItem(register PyObject *op, register Py_ssize_t i, PyObject *newitem)
+PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem)
 {
-    register PyObject *olditem;
-    register PyObject **p;
+    PyObject *olditem;
+    PyObject **p;
     if (!PyTuple_Check(op) || op->ob_refcnt != 1) {
         Py_XDECREF(newitem);
         PyErr_BadInternalCall();
@@ -224,10 +224,10 @@ PyTuple_Pack(Py_ssize_t n, ...)
 /* Methods */
 
 static void
-tupledealloc(register PyTupleObject *op)
+tupledealloc(PyTupleObject *op)
 {
-    register Py_ssize_t i;
-    register Py_ssize_t len =  Py_SIZE(op);
+    Py_ssize_t i;
+    Py_ssize_t len =  Py_SIZE(op);
     PyObject_GC_UnTrack(op);
     Py_TRASHCAN_SAFE_BEGIN(op)
     if (len > 0) {
@@ -255,20 +255,12 @@ static PyObject *
 tuplerepr(PyTupleObject *v)
 {
     Py_ssize_t i, n;
-    PyObject *s = NULL;
-    _PyAccu acc;
-    static PyObject *sep = NULL;
+    _PyUnicodeWriter writer;
 
     n = Py_SIZE(v);
     if (n == 0)
         return PyUnicode_FromString("()");
 
-    if (sep == NULL) {
-        sep = PyUnicode_FromString(", ");
-        if (sep == NULL)
-            return NULL;
-    }
-
     /* While not mutable, it is still possible to end up with a cycle in a
        tuple through an object that stores itself within a tuple (and thus
        infinitely asks for the repr of itself). This should only be
@@ -278,40 +270,58 @@ tuplerepr(PyTupleObject *v)
         return i > 0 ? PyUnicode_FromString("(...)") : NULL;
     }
 
-    if (_PyAccu_Init(&acc))
-        goto error;
+    _PyUnicodeWriter_Init(&writer);
+    writer.overallocate = 1;
+    if (Py_SIZE(v) > 1) {
+        /* "(" + "1" + ", 2" * (len - 1) + ")" */
+        writer.min_length = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1;
+    }
+    else {
+        /* "(1,)" */
+        writer.min_length = 4;
+    }
 
-    s = PyUnicode_FromString("(");
-    if (s == NULL || _PyAccu_Accumulate(&acc, s))
+    if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0)
         goto error;
-    Py_CLEAR(s);
 
     /* Do repr() on each element. */
     for (i = 0; i < n; ++i) {
+        PyObject *s;
+
+        if (i > 0) {
+            if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
+                goto error;
+        }
+
         if (Py_EnterRecursiveCall(" while getting the repr of a tuple"))
             goto error;
         s = PyObject_Repr(v->ob_item[i]);
         Py_LeaveRecursiveCall();
-        if (i > 0 && _PyAccu_Accumulate(&acc, sep))
+        if (s == NULL)
             goto error;
-        if (s == NULL || _PyAccu_Accumulate(&acc, s))
+
+        if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) {
+            Py_DECREF(s);
+            goto error;
+        }
+        Py_DECREF(s);
+    }
+
+    writer.overallocate = 0;
+    if (n > 1) {
+        if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0)
+            goto error;
+    }
+    else {
+        if (_PyUnicodeWriter_WriteASCIIString(&writer, ",)", 2) < 0)
             goto error;
-        Py_CLEAR(s);
     }
-    if (n > 1)
-        s = PyUnicode_FromString(")");
-    else
-        s = PyUnicode_FromString(",)");
-    if (s == NULL || _PyAccu_Accumulate(&acc, s))
-        goto error;
-    Py_CLEAR(s);
 
     Py_ReprLeave((PyObject *)v);
-    return _PyAccu_Finish(&acc);
+    return _PyUnicodeWriter_Finish(&writer);
 
 error:
-    _PyAccu_Destroy(&acc);
-    Py_XDECREF(s);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_ReprLeave((PyObject *)v);
     return NULL;
 }
@@ -322,15 +332,18 @@ error:
 
      1082527, 1165049, 1082531, 1165057, 1247581, 1330103, 1082533,
      1330111, 1412633, 1165069, 1247599, 1495177, 1577699
+
+   Tests have shown that it's not worth to cache the hash value, see
+   issue #9685.
 */
 
 static Py_hash_t
 tuplehash(PyTupleObject *v)
 {
-    register Py_uhash_t x;  /* Unsigned for defined overflow behavior. */
-    register Py_hash_t y;
-    register Py_ssize_t len = Py_SIZE(v);
-    register PyObject **p;
+    Py_uhash_t x;  /* Unsigned for defined overflow behavior. */
+    Py_hash_t y;
+    Py_ssize_t len = Py_SIZE(v);
+    PyObject **p;
     Py_uhash_t mult = _PyHASH_MULTIPLIER;
     x = 0x345678UL;
     p = v->ob_item;
@@ -367,7 +380,7 @@ tuplecontains(PyTupleObject *a, PyObject *el)
 }
 
 static PyObject *
-tupleitem(register PyTupleObject *a, register Py_ssize_t i)
+tupleitem(PyTupleObject *a, Py_ssize_t i)
 {
     if (i < 0 || i >= Py_SIZE(a)) {
         PyErr_SetString(PyExc_IndexError, "tuple index out of range");
@@ -378,12 +391,12 @@ tupleitem(register PyTupleObject *a, register Py_ssize_t i)
 }
 
 static PyObject *
-tupleslice(register PyTupleObject *a, register Py_ssize_t ilow,
-           register Py_ssize_t ihigh)
+tupleslice(PyTupleObject *a, Py_ssize_t ilow,
+           Py_ssize_t ihigh)
 {
-    register PyTupleObject *np;
+    PyTupleObject *np;
     PyObject **src, **dest;
-    register Py_ssize_t i;
+    Py_ssize_t i;
     Py_ssize_t len;
     if (ilow < 0)
         ilow = 0;
@@ -420,10 +433,10 @@ PyTuple_GetSlice(PyObject *op, Py_ssize_t i, Py_ssize_t j)
 }
 
 static PyObject *
-tupleconcat(register PyTupleObject *a, register PyObject *bb)
+tupleconcat(PyTupleObject *a, PyObject *bb)
 {
-    register Py_ssize_t size;
-    register Py_ssize_t i;
+    Py_ssize_t size;
+    Py_ssize_t i;
     PyObject **src, **dest;
     PyTupleObject *np;
     if (!PyTuple_Check(bb)) {
@@ -746,27 +759,15 @@ tuple_getnewargs(PyTupleObject *v)
 
 }
 
-static PyObject *
-tuple_sizeof(PyTupleObject *self)
-{
-    Py_ssize_t res;
-
-    res = PyTuple_Type.tp_basicsize + Py_SIZE(self) * sizeof(PyObject *);
-    return PyLong_FromSsize_t(res);
-}
-
 PyDoc_STRVAR(index_doc,
 "T.index(value, [start, [stop]]) -> integer -- return first index of value.\n"
 "Raises ValueError if the value is not present."
 );
 PyDoc_STRVAR(count_doc,
 "T.count(value) -> integer -- return number of occurrences of value");
-PyDoc_STRVAR(sizeof_doc,
-"T.__sizeof__() -- size of T in memory, in bytes");
 
 static PyMethodDef tuple_methods[] = {
     {"__getnewargs__",          (PyCFunction)tuple_getnewargs,  METH_NOARGS},
-    {"__sizeof__",      (PyCFunction)tuple_sizeof, METH_NOARGS, sizeof_doc},
     {"index",           (PyCFunction)tupleindex,  METH_VARARGS, index_doc},
     {"count",           (PyCFunction)tuplecount,  METH_O, count_doc},
     {NULL,              NULL}           /* sentinel */
@@ -833,8 +834,8 @@ PyTypeObject PyTuple_Type = {
 int
 _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
 {
-    register PyTupleObject *v;
-    register PyTupleObject *sv;
+    PyTupleObject *v;
+    PyTupleObject *sv;
     Py_ssize_t i;
     Py_ssize_t oldsize;
 
@@ -925,7 +926,7 @@ PyTuple_Fini(void)
 
 typedef struct {
     PyObject_HEAD
-    long it_index;
+    Py_ssize_t it_index;
     PyTupleObject *it_seq; /* Set to NULL when iterator is exhausted */
 } tupleiterobject;
 
@@ -983,7 +984,7 @@ static PyObject *
 tupleiter_reduce(tupleiterobject *it)
 {
     if (it->it_seq)
-        return Py_BuildValue("N(O)l", _PyObject_GetBuiltin("iter"),
+        return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
                              it->it_seq, it->it_index);
     else
         return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
@@ -992,7 +993,7 @@ tupleiter_reduce(tupleiterobject *it)
 static PyObject *
 tupleiter_setstate(tupleiterobject *it, PyObject *state)
 {
-    long index = PyLong_AsLong(state);
+    Py_ssize_t index = PyLong_AsSsize_t(state);
     if (index == -1 && PyErr_Occurred())
         return NULL;
     if (it->it_seq != NULL) {
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 8851fae..ca5355a 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -35,19 +35,127 @@ struct method_cache_entry {
 static struct method_cache_entry method_cache[1 << MCACHE_SIZE_EXP];
 static unsigned int next_version_tag = 0;
 
+/* alphabetical order */
+_Py_IDENTIFIER(__abstractmethods__);
 _Py_IDENTIFIER(__class__);
+_Py_IDENTIFIER(__delitem__);
 _Py_IDENTIFIER(__dict__);
 _Py_IDENTIFIER(__doc__);
-_Py_IDENTIFIER(__getitem__);
 _Py_IDENTIFIER(__getattribute__);
+_Py_IDENTIFIER(__getitem__);
 _Py_IDENTIFIER(__hash__);
+_Py_IDENTIFIER(__len__);
 _Py_IDENTIFIER(__module__);
 _Py_IDENTIFIER(__name__);
 _Py_IDENTIFIER(__new__);
+_Py_IDENTIFIER(__setitem__);
+_Py_IDENTIFIER(builtins);
 
 static PyObject *
 slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
 
+/*
+ * finds the beginning of the docstring's introspection signature.
+ * if present, returns a pointer pointing to the first '('.
+ * otherwise returns NULL.
+ *
+ * doesn't guarantee that the signature is valid, only that it
+ * has a valid prefix.  (the signature must also pass skip_signature.)
+ */
+static const char *
+find_signature(const char *name, const char *doc)
+{
+    const char *dot;
+    size_t length;
+
+    if (!doc)
+        return NULL;
+
+    assert(name != NULL);
+
+    /* for dotted names like classes, only use the last component */
+    dot = strrchr(name, '.');
+    if (dot)
+        name = dot + 1;
+
+    length = strlen(name);
+    if (strncmp(doc, name, length))
+        return NULL;
+    doc += length;
+    if (*doc != '(')
+        return NULL;
+    return doc;
+}
+
+#define SIGNATURE_END_MARKER         ")\n--\n\n"
+#define SIGNATURE_END_MARKER_LENGTH  6
+/*
+ * skips past the end of the docstring's instrospection signature.
+ * (assumes doc starts with a valid signature prefix.)
+ */
+static const char *
+skip_signature(const char *doc)
+{
+    while (*doc) {
+        if ((*doc == *SIGNATURE_END_MARKER) &&
+            !strncmp(doc, SIGNATURE_END_MARKER, SIGNATURE_END_MARKER_LENGTH))
+            return doc + SIGNATURE_END_MARKER_LENGTH;
+        if ((*doc == '\n') && (doc[1] == '\n'))
+            return NULL;
+        doc++;
+    }
+    return NULL;
+}
+
+static const char *
+_PyType_DocWithoutSignature(const char *name, const char *internal_doc)
+{
+    const char *doc = find_signature(name, internal_doc);
+
+    if (doc) {
+        doc = skip_signature(doc);
+        if (doc)
+            return doc;
+        }
+    return internal_doc;
+}
+
+PyObject *
+_PyType_GetDocFromInternalDoc(const char *name, const char *internal_doc)
+{
+    const char *doc = _PyType_DocWithoutSignature(name, internal_doc);
+
+    if (!doc) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+
+    return PyUnicode_FromString(doc);
+}
+
+PyObject *
+_PyType_GetTextSignatureFromInternalDoc(const char *name, const char *internal_doc)
+{
+    const char *start = find_signature(name, internal_doc);
+    const char *end;
+
+    if (start)
+        end = skip_signature(start);
+    else
+        end = NULL;
+    if (!end) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+
+    /* back "end" up until it points just past the final ')' */
+    end -= SIGNATURE_END_MARKER_LENGTH - 1;
+    assert((end - start) >= 2); /* should be "()" at least */
+    assert(end[-1] == ')');
+    assert(end[0] == '\n');
+    return PyUnicode_FromStringAndSize(start, end - start);
+}
+
 unsigned int
 PyType_ClearCache(void)
 {
@@ -94,16 +202,17 @@ PyType_Modified(PyTypeObject *type)
        needed.
      */
     PyObject *raw, *ref;
-    Py_ssize_t i, n;
+    Py_ssize_t i;
 
     if (!PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG))
         return;
 
     raw = type->tp_subclasses;
     if (raw != NULL) {
-        n = PyList_GET_SIZE(raw);
-        for (i = 0; i < n; i++) {
-            ref = PyList_GET_ITEM(raw, i);
+        assert(PyDict_CheckExact(raw));
+        i = 0;
+        while (PyDict_Next(raw, &i, NULL, &ref)) {
+            assert(PyWeakref_CheckRef(ref));
             ref = PyWeakref_GET_OBJECT(ref);
             if (ref != Py_None) {
                 PyType_Modified((PyTypeObject *)ref);
@@ -336,11 +445,10 @@ type_set_qualname(PyTypeObject *type, PyObject *value, void *context)
 static PyObject *
 type_module(PyTypeObject *type, void *context)
 {
-    PyObject *mod;
     char *s;
 
     if (type->tp_flags & Py_TPFLAGS_HEAPTYPE) {
-        mod = _PyDict_GetItemId(type->tp_dict, &PyId___module__);
+        PyObject *mod = _PyDict_GetItemId(type->tp_dict, &PyId___module__);
         if (!mod) {
             PyErr_Format(PyExc_AttributeError, "__module__");
             return 0;
@@ -349,11 +457,14 @@ type_module(PyTypeObject *type, void *context)
         return mod;
     }
     else {
+        PyObject *name;
         s = strrchr(type->tp_name, '.');
         if (s != NULL)
             return PyUnicode_FromStringAndSize(
                 type->tp_name, (Py_ssize_t)(s - type->tp_name));
-        return PyUnicode_FromString("builtins");
+        name = _PyUnicode_FromId(&PyId_builtins);
+        Py_XINCREF(name);
+        return name;
     }
 }
 
@@ -375,9 +486,11 @@ type_abstractmethods(PyTypeObject *type, void *context)
     /* type itself has an __abstractmethods__ descriptor (this). Don't return
        that. */
     if (type != &PyType_Type)
-        mod = PyDict_GetItemString(type->tp_dict, "__abstractmethods__");
+        mod = _PyDict_GetItemId(type->tp_dict, &PyId___abstractmethods__);
     if (!mod) {
-        PyErr_SetString(PyExc_AttributeError, "__abstractmethods__");
+        PyObject *message = _PyUnicode_FromId(&PyId___abstractmethods__);
+        if (message)
+            PyErr_SetObject(PyExc_AttributeError, message);
         return NULL;
     }
     Py_XINCREF(mod);
@@ -396,13 +509,15 @@ type_set_abstractmethods(PyTypeObject *type, PyObject *value, void *context)
         abstract = PyObject_IsTrue(value);
         if (abstract < 0)
             return -1;
-        res = PyDict_SetItemString(type->tp_dict, "__abstractmethods__", value);
+        res = _PyDict_SetItemId(type->tp_dict, &PyId___abstractmethods__, value);
     }
     else {
         abstract = 0;
-        res = PyDict_DelItemString(type->tp_dict, "__abstractmethods__");
+        res = _PyDict_DelItemId(type->tp_dict, &PyId___abstractmethods__);
         if (res && PyErr_ExceptionMatches(PyExc_KeyError)) {
-            PyErr_SetString(PyExc_AttributeError, "__abstractmethods__");
+            PyObject *message = _PyUnicode_FromId(&PyId___abstractmethods__);
+            if (message)
+                PyErr_SetObject(PyExc_AttributeError, message);
             return -1;
         }
     }
@@ -424,10 +539,13 @@ type_get_bases(PyTypeObject *type, void *context)
 }
 
 static PyTypeObject *best_base(PyObject *);
-static int mro_internal(PyTypeObject *);
+static int mro_internal(PyTypeObject *, PyObject **);
+Py_LOCAL_INLINE(int) type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *);
 static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, char *);
 static int add_subclass(PyTypeObject*, PyTypeObject*);
+static int add_all_subclasses(PyTypeObject *type, PyObject *bases);
 static void remove_subclass(PyTypeObject *, PyTypeObject *);
+static void remove_all_subclasses(PyTypeObject *type, PyObject *bases);
 static void update_all_slots(PyTypeObject *);
 
 typedef int (*update_callback)(PyTypeObject *, void *);
@@ -435,173 +553,194 @@ static int update_subclasses(PyTypeObject *type, PyObject *name,
                              update_callback callback, void *data);
 static int recurse_down_subclasses(PyTypeObject *type, PyObject *name,
                                    update_callback callback, void *data);
+static PyObject *type_subclasses(PyTypeObject *type, PyObject *ignored);
 
 static int
-mro_subclasses(PyTypeObject *type, PyObject* temp)
+mro_hierarchy(PyTypeObject *type, PyObject *temp)
 {
-    PyTypeObject *subclass;
-    PyObject *ref, *subclasses, *old_mro;
+    int res;
+    PyObject *new_mro, *old_mro;
+    PyObject *tuple;
+    PyObject *subclasses;
     Py_ssize_t i, n;
 
-    subclasses = type->tp_subclasses;
+    res = mro_internal(type, &old_mro);
+    if (res <= 0)
+        /* error / reentrance */
+        return res;
+    new_mro = type->tp_mro;
+
+    if (old_mro != NULL)
+        tuple = PyTuple_Pack(3, type, new_mro, old_mro);
+    else
+        tuple = PyTuple_Pack(2, type, new_mro);
+
+    if (tuple != NULL)
+        res = PyList_Append(temp, tuple);
+    else
+        res = -1;
+    Py_XDECREF(tuple);
+
+    if (res < 0) {
+        type->tp_mro = old_mro;
+        Py_DECREF(new_mro);
+        return -1;
+    }
+    Py_XDECREF(old_mro);
+
+    /* Obtain a copy of subclasses list to iterate over.
+
+       Otherwise type->tp_subclasses might be altered
+       in the middle of the loop, for example, through a custom mro(),
+       by invoking type_set_bases on some subclass of the type
+       which in turn calls remove_subclass/add_subclass on this type.
+
+       Finally, this makes things simple avoiding the need to deal
+       with dictionary iterators and weak references.
+    */
+    subclasses = type_subclasses(type, NULL);
     if (subclasses == NULL)
-        return 0;
-    assert(PyList_Check(subclasses));
+        return -1;
     n = PyList_GET_SIZE(subclasses);
     for (i = 0; i < n; i++) {
-        ref = PyList_GET_ITEM(subclasses, i);
-        assert(PyWeakref_CheckRef(ref));
-        subclass = (PyTypeObject *)PyWeakref_GET_OBJECT(ref);
-        assert(subclass != NULL);
-        if ((PyObject *)subclass == Py_None)
-            continue;
-        assert(PyType_Check(subclass));
-        old_mro = subclass->tp_mro;
-        if (mro_internal(subclass) < 0) {
-            subclass->tp_mro = old_mro;
-            return -1;
-        }
-        else {
-            PyObject* tuple;
-            tuple = PyTuple_Pack(2, subclass, old_mro);
-            Py_DECREF(old_mro);
-            if (!tuple)
-                return -1;
-            if (PyList_Append(temp, tuple) < 0)
-                return -1;
-            Py_DECREF(tuple);
-        }
-        if (mro_subclasses(subclass, temp) < 0)
-            return -1;
+        PyTypeObject *subclass;
+        subclass = (PyTypeObject *)PyList_GET_ITEM(subclasses, i);
+        res = mro_hierarchy(subclass, temp);
+        if (res < 0)
+            break;
     }
-    return 0;
+    Py_DECREF(subclasses);
+
+    return res;
 }
 
 static int
-type_set_bases(PyTypeObject *type, PyObject *value, void *context)
+type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context)
 {
-    Py_ssize_t i;
-    int r = 0;
-    PyObject *ob, *temp;
+    int res = 0;
+    PyObject *temp;
+    PyObject *old_bases;
     PyTypeObject *new_base, *old_base;
-    PyObject *old_bases, *old_mro;
+    Py_ssize_t i;
 
-    if (!check_set_special_type_attr(type, value, "__bases__"))
+    if (!check_set_special_type_attr(type, new_bases, "__bases__"))
         return -1;
-    if (!PyTuple_Check(value)) {
+    if (!PyTuple_Check(new_bases)) {
         PyErr_Format(PyExc_TypeError,
              "can only assign tuple to %s.__bases__, not %s",
-                 type->tp_name, Py_TYPE(value)->tp_name);
+                 type->tp_name, Py_TYPE(new_bases)->tp_name);
         return -1;
     }
-    if (PyTuple_GET_SIZE(value) == 0) {
+    if (PyTuple_GET_SIZE(new_bases) == 0) {
         PyErr_Format(PyExc_TypeError,
              "can only assign non-empty tuple to %s.__bases__, not ()",
                  type->tp_name);
         return -1;
     }
-    for (i = 0; i < PyTuple_GET_SIZE(value); i++) {
-        ob = PyTuple_GET_ITEM(value, i);
+    for (i = 0; i < PyTuple_GET_SIZE(new_bases); i++) {
+        PyObject *ob;
+        PyTypeObject *base;
+
+        ob = PyTuple_GET_ITEM(new_bases, i);
         if (!PyType_Check(ob)) {
             PyErr_Format(PyExc_TypeError,
                          "%s.__bases__ must be tuple of classes, not '%s'",
                          type->tp_name, Py_TYPE(ob)->tp_name);
             return -1;
         }
-        if (PyType_IsSubtype((PyTypeObject*)ob, type)) {
+
+        base = (PyTypeObject*)ob;
+        if (PyType_IsSubtype(base, type) ||
+            /* In case of reentering here again through a custom mro()
+               the above check is not enough since it relies on
+               base->tp_mro which would gonna be updated inside
+               mro_internal only upon returning from the mro().
+
+               However, base->tp_base has already been assigned (see
+               below), which in turn may cause an inheritance cycle
+               through tp_base chain.  And this is definitely
+               not what you want to ever happen.  */
+            (base->tp_mro != NULL && type_is_subtype_base_chain(base, type))) {
+
             PyErr_SetString(PyExc_TypeError,
                             "a __bases__ item causes an inheritance cycle");
             return -1;
         }
     }
 
-    new_base = best_base(value);
-
-    if (!new_base)
+    new_base = best_base(new_bases);
+    if (new_base == NULL)
         return -1;
 
     if (!compatible_for_assignment(type->tp_base, new_base, "__bases__"))
         return -1;
 
+    Py_INCREF(new_bases);
     Py_INCREF(new_base);
-    Py_INCREF(value);
 
     old_bases = type->tp_bases;
     old_base = type->tp_base;
-    old_mro = type->tp_mro;
 
-    type->tp_bases = value;
+    type->tp_bases = new_bases;
     type->tp_base = new_base;
 
-    if (mro_internal(type) < 0) {
-        goto bail;
-    }
-
     temp = PyList_New(0);
-    if (!temp)
+    if (temp == NULL)
         goto bail;
-
-    r = mro_subclasses(type, temp);
-
-    if (r < 0) {
-        for (i = 0; i < PyList_Size(temp); i++) {
-            PyTypeObject* cls;
-            PyObject* mro;
-            PyArg_UnpackTuple(PyList_GET_ITEM(temp, i),
-                             "", 2, 2, &cls, &mro);
-            Py_INCREF(mro);
-            ob = cls->tp_mro;
-            cls->tp_mro = mro;
-            Py_DECREF(ob);
-        }
-        Py_DECREF(temp);
-        goto bail;
-    }
-
+    if (mro_hierarchy(type, temp) < 0)
+        goto undo;
     Py_DECREF(temp);
 
-    /* any base that was in __bases__ but now isn't, we
-       need to remove |type| from its tp_subclasses.
-       conversely, any class now in __bases__ that wasn't
-       needs to have |type| added to its subclasses. */
-
-    /* for now, sod that: just remove from all old_bases,
-       add to all new_bases */
+    /* Take no action in case if type->tp_bases has been replaced
+       through reentrance.  */
+    if (type->tp_bases == new_bases) {
+        /* any base that was in __bases__ but now isn't, we
+           need to remove |type| from its tp_subclasses.
+           conversely, any class now in __bases__ that wasn't
+           needs to have |type| added to its subclasses. */
 
-    for (i = PyTuple_GET_SIZE(old_bases) - 1; i >= 0; i--) {
-        ob = PyTuple_GET_ITEM(old_bases, i);
-        if (PyType_Check(ob)) {
-            remove_subclass(
-                (PyTypeObject*)ob, type);
-        }
+        /* for now, sod that: just remove from all old_bases,
+           add to all new_bases */
+        remove_all_subclasses(type, old_bases);
+        res = add_all_subclasses(type, new_bases);
+        update_all_slots(type);
     }
 
-    for (i = PyTuple_GET_SIZE(value) - 1; i >= 0; i--) {
-        ob = PyTuple_GET_ITEM(value, i);
-        if (PyType_Check(ob)) {
-            if (add_subclass((PyTypeObject*)ob, type) < 0)
-                r = -1;
-        }
-    }
-
-    update_all_slots(type);
-
     Py_DECREF(old_bases);
     Py_DECREF(old_base);
-    Py_DECREF(old_mro);
 
-    return r;
+    return res;
 
-  bail:
-    Py_DECREF(type->tp_bases);
-    Py_DECREF(type->tp_base);
-    if (type->tp_mro != old_mro) {
-        Py_DECREF(type->tp_mro);
+  undo:
+    for (i = PyList_GET_SIZE(temp) - 1; i >= 0; i--) {
+        PyTypeObject *cls;
+        PyObject *new_mro, *old_mro = NULL;
+
+        PyArg_UnpackTuple(PyList_GET_ITEM(temp, i),
+                          "", 2, 3, &cls, &new_mro, &old_mro);
+        /* Do not rollback if cls has a newer version of MRO.  */
+        if (cls->tp_mro == new_mro) {
+            Py_XINCREF(old_mro);
+            cls->tp_mro = old_mro;
+            Py_DECREF(new_mro);
+        }
     }
+    Py_DECREF(temp);
+
+  bail:
+    if (type->tp_bases == new_bases) {
+        assert(type->tp_base == new_base);
 
-    type->tp_bases = old_bases;
-    type->tp_base = old_base;
-    type->tp_mro = old_mro;
+        type->tp_bases = old_bases;
+        type->tp_base = old_base;
+
+        Py_DECREF(new_bases);
+        Py_DECREF(new_base);
+    }
+    else {
+        Py_DECREF(old_bases);
+        Py_DECREF(old_base);
+    }
 
     return -1;
 }
@@ -620,8 +759,9 @@ static PyObject *
 type_get_doc(PyTypeObject *type, void *context)
 {
     PyObject *result;
-    if (!(type->tp_flags & Py_TPFLAGS_HEAPTYPE) && type->tp_doc != NULL)
-        return PyUnicode_FromString(type->tp_doc);
+    if (!(type->tp_flags & Py_TPFLAGS_HEAPTYPE) && type->tp_doc != NULL) {
+        return _PyType_GetDocFromInternalDoc(type->tp_name, type->tp_doc);
+    }
     result = _PyDict_GetItemId(type->tp_dict, &PyId___doc__);
     if (result == NULL) {
         result = Py_None;
@@ -637,6 +777,12 @@ type_get_doc(PyTypeObject *type, void *context)
     return result;
 }
 
+static PyObject *
+type_get_text_signature(PyTypeObject *type, void *context)
+{
+    return _PyType_GetTextSignatureFromInternalDoc(type->tp_name, type->tp_doc);
+}
+
 static int
 type_set_doc(PyTypeObject *type, PyObject *value, void *context)
 {
@@ -683,6 +829,7 @@ static PyGetSetDef type_getsets[] = {
      (setter)type_set_abstractmethods, NULL},
     {"__dict__",  (getter)type_dict,  NULL, NULL},
     {"__doc__", (getter)type_get_doc, (setter)type_set_doc, NULL},
+    {"__text_signature__", (getter)type_get_text_signature, NULL, NULL},
     {0}
 };
 
@@ -704,7 +851,7 @@ type_repr(PyTypeObject *type)
         return NULL;
     }
 
-    if (mod != NULL && PyUnicode_CompareWithASCIIString(mod, "builtins"))
+    if (mod != NULL && _PyUnicode_CompareWithId(mod, &PyId_builtins))
         rtn = PyUnicode_FromFormat("<class '%U.%U'>", mod, name);
     else
         rtn = PyUnicode_FromFormat("<class '%s'>", type->tp_name);
@@ -726,6 +873,13 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds)
         return NULL;
     }
 
+#ifdef Py_DEBUG
+    /* type_call() must not be called with an exception set,
+       because it may clear it (directly or indirectly) and so the
+       caller loses its exception */
+    assert(!PyErr_Occurred());
+#endif
+
     obj = type->tp_new(type, args, kwds);
     if (obj != NULL) {
         /* Ugly exception: when the call was type(something),
@@ -740,10 +894,12 @@ type_call(PyTypeObject *type, PyObject *args, PyObject *kwds)
         if (!PyType_IsSubtype(Py_TYPE(obj), type))
             return obj;
         type = Py_TYPE(obj);
-        if (type->tp_init != NULL &&
-            type->tp_init(obj, args, kwds) < 0) {
-            Py_DECREF(obj);
-            obj = NULL;
+        if (type->tp_init != NULL) {
+            int res = type->tp_init(obj, args, kwds);
+            if (res < 0) {
+                Py_DECREF(obj);
+                obj = NULL;
+            }
         }
     }
     return obj;
@@ -770,7 +926,7 @@ PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems)
         Py_INCREF(type);
 
     if (type->tp_itemsize == 0)
-        PyObject_INIT(obj, type);
+        (void)PyObject_INIT(obj, type);
     else
         (void) PyObject_INIT_VAR((PyVarObject *)obj, type, nitems);
 
@@ -902,6 +1058,7 @@ subtype_dealloc(PyObject *self)
     PyTypeObject *type, *base;
     destructor basedealloc;
     PyThreadState *tstate = PyThreadState_GET();
+    int has_finalizer;
 
     /* Extract the type; we expect it to be a heap type */
     type = Py_TYPE(self);
@@ -917,6 +1074,10 @@ subtype_dealloc(PyObject *self)
            clear_slots(), or DECREF the dict, or clear weakrefs. */
 
         /* Maybe call finalizer; exit early if resurrected */
+        if (type->tp_finalize) {
+            if (PyObject_CallFinalizerFromDealloc(self) < 0)
+                return;
+        }
         if (type->tp_del) {
             type->tp_del(self);
             if (self->ob_refcnt > 0)
@@ -968,25 +1129,36 @@ subtype_dealloc(PyObject *self)
         assert(base);
     }
 
-    /* If we added a weaklist, we clear it.      Do this *before* calling
-       the finalizer (__del__), clearing slots, or clearing the instance
-       dict. */
+    has_finalizer = type->tp_finalize || type->tp_del;
+
+    /* Maybe call finalizer; exit early if resurrected */
+    if (has_finalizer)
+        _PyObject_GC_TRACK(self);
 
+    if (type->tp_finalize) {
+        if (PyObject_CallFinalizerFromDealloc(self) < 0) {
+            /* Resurrected */
+            goto endlabel;
+        }
+    }
+    /* If we added a weaklist, we clear it.      Do this *before* calling
+       tp_del, clearing slots, or clearing the instance dict. */
     if (type->tp_weaklistoffset && !base->tp_weaklistoffset)
         PyObject_ClearWeakRefs(self);
 
-    /* Maybe call finalizer; exit early if resurrected */
     if (type->tp_del) {
-        _PyObject_GC_TRACK(self);
         type->tp_del(self);
-        if (self->ob_refcnt > 0)
-            goto endlabel;              /* resurrected */
-        else
-            _PyObject_GC_UNTRACK(self);
+        if (self->ob_refcnt > 0) {
+            /* Resurrected */
+            goto endlabel;
+        }
+    }
+    if (has_finalizer) {
+        _PyObject_GC_UNTRACK(self);
         /* New weakrefs could be created during the finalizer call.
-            If this occurs, clear them out without calling their
-            finalizers since they might rely on part of the object
-            being finalized that has already been destroyed. */
+           If this occurs, clear them out without calling their
+           finalizers since they might rely on part of the object
+           being finalized that has already been destroyed. */
         if (type->tp_weaklistoffset && !base->tp_weaklistoffset) {
             /* Modeled after GET_WEAKREFS_LISTPTR() */
             PyWeakReference **list = (PyWeakReference **) \
@@ -1141,6 +1313,18 @@ static PyTypeObject *solid_base(PyTypeObject *type);
 
 /* type test with subclassing support */
 
+Py_LOCAL_INLINE(int)
+type_is_subtype_base_chain(PyTypeObject *a, PyTypeObject *b)
+{
+    do {
+        if (a == b)
+            return 1;
+        a = a->tp_base;
+    } while (a != NULL);
+
+    return (b == &PyBaseObject_Type);
+}
+
 int
 PyType_IsSubtype(PyTypeObject *a, PyTypeObject *b)
 {
@@ -1159,15 +1343,9 @@ PyType_IsSubtype(PyTypeObject *a, PyTypeObject *b)
         }
         return 0;
     }
-    else {
+    else
         /* a is not completely initilized yet; follow tp_base */
-        do {
-            if (a == b)
-                return 1;
-            a = a->tp_base;
-        } while (a != NULL);
-        return b == &PyBaseObject_Type;
-    }
+        return type_is_subtype_base_chain(a, b);
 }
 
 /* Internal routines to do a method lookup in the type
@@ -1434,10 +1612,11 @@ consistent method resolution\norder (MRO) for bases");
 }
 
 static int
-pmerge(PyObject *acc, PyObject* to_merge) {
+pmerge(PyObject *acc, PyObject* to_merge)
+{
+    int res = 0;
     Py_ssize_t i, j, to_merge_size, empty_cnt;
     int *remain;
-    int ok;
 
     to_merge_size = PyList_GET_SIZE(to_merge);
 
@@ -1446,8 +1625,10 @@ pmerge(PyObject *acc, PyObject* to_merge) {
        that is not included in acc.
     */
     remain = (int *)PyMem_MALLOC(SIZEOF_INT*to_merge_size);
-    if (remain == NULL)
+    if (remain == NULL) {
+        PyErr_NoMemory();
         return -1;
+    }
     for (i = 0; i < to_merge_size; i++)
         remain[i] = 0;
 
@@ -1473,15 +1654,13 @@ pmerge(PyObject *acc, PyObject* to_merge) {
         candidate = PyList_GET_ITEM(cur_list, remain[i]);
         for (j = 0; j < to_merge_size; j++) {
             PyObject *j_lst = PyList_GET_ITEM(to_merge, j);
-            if (tail_contains(j_lst, remain[j], candidate)) {
+            if (tail_contains(j_lst, remain[j], candidate))
                 goto skip; /* continue outer loop */
-            }
-        }
-        ok = PyList_Append(acc, candidate);
-        if (ok < 0) {
-            PyMem_Free(remain);
-            return -1;
         }
+        res = PyList_Append(acc, candidate);
+        if (res < 0)
+            goto out;
+
         for (j = 0; j < to_merge_size; j++) {
             PyObject *j_lst = PyList_GET_ITEM(to_merge, j);
             if (remain[j] < PyList_GET_SIZE(j_lst) &&
@@ -1493,22 +1672,25 @@ pmerge(PyObject *acc, PyObject* to_merge) {
       skip: ;
     }
 
-    if (empty_cnt == to_merge_size) {
-        PyMem_FREE(remain);
-        return 0;
+    if (empty_cnt != to_merge_size) {
+        set_mro_error(to_merge, remain);
+        res = -1;
     }
-    set_mro_error(to_merge, remain);
+
+  out:
     PyMem_FREE(remain);
-    return -1;
+
+    return res;
 }
 
 static PyObject *
 mro_implementation(PyTypeObject *type)
 {
-    Py_ssize_t i, n;
-    int ok;
-    PyObject *bases, *result;
+    PyObject *result = NULL;
+    PyObject *bases;
     PyObject *to_merge, *bases_aslist;
+    int res;
+    Py_ssize_t i, n;
 
     if (type->tp_dict == NULL) {
         if (PyType_Ready(type) < 0)
@@ -1532,42 +1714,44 @@ mro_implementation(PyTypeObject *type)
         return NULL;
 
     for (i = 0; i < n; i++) {
-        PyObject *base = PyTuple_GET_ITEM(bases, i);
-        PyObject *parentMRO;
-        parentMRO = PySequence_List(((PyTypeObject*)base)->tp_mro);
-        if (parentMRO == NULL) {
-            Py_DECREF(to_merge);
-            return NULL;
+        PyTypeObject *base;
+        PyObject *base_mro_aslist;
+
+        base = (PyTypeObject *)PyTuple_GET_ITEM(bases, i);
+        if (base->tp_mro == NULL) {
+            PyErr_Format(PyExc_TypeError,
+                         "Cannot extend an incomplete type '%.100s'",
+                         base->tp_name);
+            goto out;
         }
 
-        PyList_SET_ITEM(to_merge, i, parentMRO);
+        base_mro_aslist = PySequence_List(base->tp_mro);
+        if (base_mro_aslist == NULL)
+            goto out;
+
+        PyList_SET_ITEM(to_merge, i, base_mro_aslist);
     }
 
     bases_aslist = PySequence_List(bases);
-    if (bases_aslist == NULL) {
-        Py_DECREF(to_merge);
-        return NULL;
-    }
+    if (bases_aslist == NULL)
+        goto out;
     /* This is just a basic sanity check. */
     if (check_duplicates(bases_aslist) < 0) {
-        Py_DECREF(to_merge);
         Py_DECREF(bases_aslist);
-        return NULL;
+        goto out;
     }
     PyList_SET_ITEM(to_merge, n, bases_aslist);
 
     result = Py_BuildValue("[O]", (PyObject *)type);
-    if (result == NULL) {
-        Py_DECREF(to_merge);
-        return NULL;
-    }
+    if (result == NULL)
+        goto out;
 
-    ok = pmerge(result, to_merge);
+    res = pmerge(result, to_merge);
+    if (res < 0)
+        Py_CLEAR(result);
+
+  out:
     Py_DECREF(to_merge);
-    if (ok < 0) {
-        Py_DECREF(result);
-        return NULL;
-    }
 
     return result;
 }
@@ -1581,59 +1765,133 @@ mro_external(PyObject *self)
 }
 
 static int
-mro_internal(PyTypeObject *type)
+mro_check(PyTypeObject *type, PyObject *mro)
 {
-    PyObject *mro, *result, *tuple;
-    int checkit = 0;
+    PyTypeObject *solid;
+    Py_ssize_t i, n;
+
+    solid = solid_base(type);
 
-    if (Py_TYPE(type) == &PyType_Type) {
-        result = mro_implementation(type);
+    n = PyTuple_GET_SIZE(mro);
+    for (i = 0; i < n; i++) {
+        PyTypeObject *base;
+        PyObject *tmp;
+
+        tmp = PyTuple_GET_ITEM(mro, i);
+        if (!PyType_Check(tmp)) {
+            PyErr_Format(
+                PyExc_TypeError,
+                "mro() returned a non-class ('%.500s')",
+                Py_TYPE(tmp)->tp_name);
+            return -1;
+        }
+
+        base = (PyTypeObject*)tmp;
+        if (!PyType_IsSubtype(solid, solid_base(base))) {
+            PyErr_Format(
+                PyExc_TypeError,
+                "mro() returned base with unsuitable layout ('%.500s')",
+                base->tp_name);
+            return -1;
+        }
     }
-    else {
+
+    return 0;
+}
+
+/* Lookups an mcls.mro method, invokes it and checks the result (if needed,
+   in case of a custom mro() implementation).
+
+   Keep in mind that during execution of this function type->tp_mro
+   can be replaced due to possible reentrance (for example,
+   through type_set_bases):
+
+      - when looking up the mcls.mro attribute (it could be
+        a user-provided descriptor);
+
+      - from inside a custom mro() itself;
+
+      - through a finalizer of the return value of mro().
+*/
+static PyObject *
+mro_invoke(PyTypeObject *type)
+{
+    PyObject *mro_result;
+    PyObject *new_mro;
+    int custom = (Py_TYPE(type) != &PyType_Type);
+
+    if (custom) {
         _Py_IDENTIFIER(mro);
-        checkit = 1;
-        mro = lookup_method((PyObject *)type, &PyId_mro);
-        if (mro == NULL)
-            return -1;
-        result = PyObject_CallObject(mro, NULL);
-        Py_DECREF(mro);
+        PyObject *mro_meth = lookup_method((PyObject *)type, &PyId_mro);
+        if (mro_meth == NULL)
+            return NULL;
+        mro_result = PyObject_CallObject(mro_meth, NULL);
+        Py_DECREF(mro_meth);
     }
-    if (result == NULL)
-        return -1;
-    tuple = PySequence_Tuple(result);
-    Py_DECREF(result);
-    if (tuple == NULL)
-        return -1;
-    if (checkit) {
-        Py_ssize_t i, len;
-        PyObject *cls;
-        PyTypeObject *solid;
+    else {
+        mro_result = mro_implementation(type);
+    }
+    if (mro_result == NULL)
+        return NULL;
 
-        solid = solid_base(type);
+    new_mro = PySequence_Tuple(mro_result);
+    Py_DECREF(mro_result);
+    if (new_mro == NULL)
+        return NULL;
 
-        len = PyTuple_GET_SIZE(tuple);
+    if (custom && mro_check(type, new_mro) < 0) {
+        Py_DECREF(new_mro);
+        return NULL;
+    }
 
-        for (i = 0; i < len; i++) {
-            PyTypeObject *t;
-            cls = PyTuple_GET_ITEM(tuple, i);
-            if (!PyType_Check(cls)) {
-                PyErr_Format(PyExc_TypeError,
-                 "mro() returned a non-class ('%.500s')",
-                                 Py_TYPE(cls)->tp_name);
-                Py_DECREF(tuple);
-                return -1;
-            }
-            t = (PyTypeObject*)cls;
-            if (!PyType_IsSubtype(solid, solid_base(t))) {
-                PyErr_Format(PyExc_TypeError,
-             "mro() returned base with unsuitable layout ('%.500s')",
-                                     t->tp_name);
-                        Py_DECREF(tuple);
-                        return -1;
-            }
-        }
+    return new_mro;
+}
+
+/* Calculates and assigns a new MRO to type->tp_mro.
+   Return values and invariants:
+
+     - Returns 1 if a new MRO value has been set to type->tp_mro due to
+       this call of mro_internal (no tricky reentrancy and no errors).
+
+       In case if p_old_mro argument is not NULL, a previous value
+       of type->tp_mro is put there, and the ownership of this
+       reference is transferred to a caller.
+       Otherwise, the previous value (if any) is decref'ed.
+
+     - Returns 0 in case when type->tp_mro gets changed because of
+       reentering here through a custom mro() (see a comment to mro_invoke).
+
+       In this case, a refcount of an old type->tp_mro is adjusted
+       somewhere deeper in the call stack (by the innermost mro_internal
+       or its caller) and may become zero upon returning from here.
+       This also implies that the whole hierarchy of subclasses of the type
+       has seen the new value and updated their MRO accordingly.
+
+     - Returns -1 in case of an error.
+*/
+static int
+mro_internal(PyTypeObject *type, PyObject **p_old_mro)
+{
+    PyObject *new_mro, *old_mro;
+    int reent;
+
+    /* Keep a reference to be able to do a reentrancy check below.
+       Don't let old_mro be GC'ed and its address be reused for
+       another object, like (suddenly!) a new tp_mro.  */
+    old_mro = type->tp_mro;
+    Py_XINCREF(old_mro);
+    new_mro = mro_invoke(type);  /* might cause reentrance */
+    reent = (type->tp_mro != old_mro);
+    Py_XDECREF(old_mro);
+    if (new_mro == NULL)
+        return -1;
+
+    if (reent) {
+        Py_DECREF(new_mro);
+        return 0;
     }
-    type->tp_mro = tuple;
+
+    type->tp_mro = new_mro;
 
     type_mro_modified(type, type->tp_mro);
     /* corner case: the super class might have been hidden
@@ -1642,7 +1900,12 @@ mro_internal(PyTypeObject *type)
 
     PyType_Modified(type);
 
-    return 0;
+    if (p_old_mro != NULL)
+        *p_old_mro = old_mro;  /* transfer the ownership */
+    else
+        Py_XDECREF(old_mro);
+
+    return 1;
 }
 
 
@@ -1674,6 +1937,12 @@ best_base(PyObject *bases)
             if (PyType_Ready(base_i) < 0)
                 return NULL;
         }
+        if (!PyType_HasFeature(base_i, Py_TPFLAGS_BASETYPE)) {
+            PyErr_Format(PyExc_TypeError,
+                         "type '%.100s' is not an acceptable base type",
+                         base_i->tp_name);
+            return NULL;
+        }
         candidate = solid_base(base_i);
         if (winner == NULL) {
             winner = candidate;
@@ -1939,7 +2208,7 @@ type_init(PyObject *cls, PyObject *args, PyObject *kwds)
     return res;
 }
 
-long
+unsigned long
 PyType_GetFlags(PyTypeObject *type)
 {
     return type->tp_flags;
@@ -2054,12 +2323,6 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
     if (base == NULL) {
         goto error;
     }
-    if (!PyType_HasFeature(base, Py_TPFLAGS_BASETYPE)) {
-        PyErr_Format(PyExc_TypeError,
-                     "type '%.100s' is not an acceptable base type",
-                     base->tp_name);
-        goto error;
-    }
 
     dict = PyDict_Copy(orig_dict);
     if (dict == NULL)
@@ -2108,7 +2371,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
             if (!valid_identifier(tmp))
                 goto error;
             assert(PyUnicode_Check(tmp));
-            if (PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) {
+            if (_PyUnicode_CompareWithId(tmp, &PyId___dict__) == 0) {
                 if (!may_add_dict || add_dict) {
                     PyErr_SetString(PyExc_TypeError,
                         "__dict__ slot disallowed: "
@@ -2139,7 +2402,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
         for (i = j = 0; i < nslots; i++) {
             tmp = PyTuple_GET_ITEM(slots, i);
             if ((add_dict &&
-                 PyUnicode_CompareWithASCIIString(tmp, "__dict__") == 0) ||
+                 _PyUnicode_CompareWithId(tmp, &PyId___dict__) == 0) ||
                 (add_weak &&
                  PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0))
                 continue;
@@ -2210,7 +2473,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
 
     /* Initialize tp_flags */
     type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE |
-        Py_TPFLAGS_BASETYPE;
+        Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_FINALIZE;
     if (base->tp_flags & Py_TPFLAGS_HAVE_GC)
         type->tp_flags |= Py_TPFLAGS_HAVE_GC;
 
@@ -2280,8 +2543,10 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
             /* Silently truncate the docstring if it contains null bytes. */
             len = strlen(doc_str);
             tp_doc = (char *)PyObject_MALLOC(len + 1);
-            if (tp_doc == NULL)
+            if (tp_doc == NULL) {
+                PyErr_NoMemory();
                 goto error;
+            }
             memcpy(tp_doc, doc_str, len + 1);
             type->tp_doc = tp_doc;
         }
@@ -2325,9 +2590,6 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
             type->tp_dictoffset = slotoffset;
         slotoffset += sizeof(PyObject *);
     }
-    if (type->tp_dictoffset) {
-        et->ht_cached_keys = _PyDict_NewKeysForClass();
-    }
     if (add_weak) {
         assert(!base->tp_itemsize);
         type->tp_weaklistoffset = slotoffset;
@@ -2377,6 +2639,10 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
     /* Put the proper slots in place */
     fixup_slot_dispatchers(type);
 
+    if (type->tp_dictoffset) {
+        et->ht_cached_keys = _PyDict_NewKeysForClass();
+    }
+
     Py_DECREF(dict);
     return (PyObject *)type;
 
@@ -2401,7 +2667,7 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases)
     char *s;
     char *res_start = (char*)res;
     PyType_Slot *slot;
-    
+
     /* Set the type name and qualname */
     s = strrchr(spec->name, '.');
     if (s == NULL)
@@ -2422,7 +2688,7 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases)
     type->tp_name = spec->name;
     if (!type->tp_name)
         goto fail;
-    
+
     /* Adjust for empty tuple bases */
     if (!bases) {
         base = &PyBaseObject_Type;
@@ -2482,17 +2748,17 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases)
         /* need to make a copy of the docstring slot, which usually
            points to a static string literal */
         if (slot->slot == Py_tp_doc) {
-            size_t len = strlen(slot->pfunc)+1;
+            const char *old_doc = _PyType_DocWithoutSignature(type->tp_name, slot->pfunc);
+            size_t len = strlen(old_doc)+1;
             char *tp_doc = PyObject_MALLOC(len);
-            if (tp_doc == NULL)
+            if (tp_doc == NULL) {
+                PyErr_NoMemory();
                 goto fail;
-            memcpy(tp_doc, slot->pfunc, len);
+            }
+            memcpy(tp_doc, old_doc, len);
             type->tp_doc = tp_doc;
         }
     }
-    if (type->tp_dictoffset) {
-        res->ht_cached_keys = _PyDict_NewKeysForClass();
-    }
     if (type->tp_dealloc == NULL) {
         /* It's a heap type, so needs the heap types' dealloc.
            subtype_dealloc will call the base type's tp_dealloc, if
@@ -2503,10 +2769,14 @@ PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases)
     if (PyType_Ready(type) < 0)
         goto fail;
 
+    if (type->tp_dictoffset) {
+        res->ht_cached_keys = _PyDict_NewKeysForClass();
+    }
+
     /* Set type.__module__ */
     s = strrchr(spec->name, '.');
     if (s != NULL)
-        _PyDict_SetItemId(type->tp_dict, &PyId___module__, 
+        _PyDict_SetItemId(type->tp_dict, &PyId___module__,
             PyUnicode_FromStringAndSize(
                 spec->name, (Py_ssize_t)(s - spec->name)));
 
@@ -2523,6 +2793,19 @@ PyType_FromSpec(PyType_Spec *spec)
     return PyType_FromSpecWithBases(spec, NULL);
 }
 
+void *
+PyType_GetSlot(PyTypeObject *type, int slot)
+{
+    if (!PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+    if (slot >= Py_ARRAY_LENGTH(slotoffsets)) {
+        /* Extension module requesting slot from a future version */
+        return NULL;
+    }
+    return  *(void**)(((char*)type) + slotoffsets[slot]);
+}
 
 /* Internal API to look for a name through the MRO.
    This returns a borrowed reference, and doesn't set an exception! */
@@ -2695,10 +2978,14 @@ static void
 type_dealloc(PyTypeObject *type)
 {
     PyHeapTypeObject *et;
+    PyObject *tp, *val, *tb;
 
     /* Assert this is a heap-allocated type object */
     assert(type->tp_flags & Py_TPFLAGS_HEAPTYPE);
     _PyObject_GC_UNTRACK(type);
+    PyErr_Fetch(&tp, &val, &tb);
+    remove_all_subclasses(type, type->tp_bases);
+    PyErr_Restore(tp, val, tb);
     PyObject_ClearWeakRefs((PyObject *)type);
     et = (PyHeapTypeObject *)type;
     Py_XDECREF(type->tp_base);
@@ -2723,7 +3010,7 @@ static PyObject *
 type_subclasses(PyTypeObject *type, PyObject *args_ignored)
 {
     PyObject *list, *raw, *ref;
-    Py_ssize_t i, n;
+    Py_ssize_t i;
 
     list = PyList_New(0);
     if (list == NULL)
@@ -2731,10 +3018,9 @@ type_subclasses(PyTypeObject *type, PyObject *args_ignored)
     raw = type->tp_subclasses;
     if (raw == NULL)
         return list;
-    assert(PyList_Check(raw));
-    n = PyList_GET_SIZE(raw);
-    for (i = 0; i < n; i++) {
-        ref = PyList_GET_ITEM(raw, i);
+    assert(PyDict_CheckExact(raw));
+    i = 0;
+    while (PyDict_Next(raw, &i, NULL, &ref)) {
         assert(PyWeakref_CheckRef(ref));
         ref = PyWeakref_GET_OBJECT(ref);
         if (ref != Py_None) {
@@ -2867,6 +3153,8 @@ static PyMethodDef type_methods[] = {
 };
 
 PyDoc_STRVAR(type_doc,
+/* this text signature cannot be accurate yet.  will fix.  --larry */
+"type(object_or_name, bases, dict)\n"
 "type(object) -> the object's type\n"
 "type(name, bases, dict) -> a new type");
 
@@ -2923,8 +3211,8 @@ type_clear(PyTypeObject *type)
            class's dict; the cycle will be broken that way.
 
        tp_subclasses:
-           A list of weak references can't be part of a cycle; and
-           lists have their own tp_clear.
+           A dict of weak references can't be part of a cycle; and
+           dicts have their own tp_clear.
 
        slots (in PyHeapTypeObject):
            A tuple of strings can't be part of a cycle.
@@ -3141,7 +3429,7 @@ object_repr(PyObject *self)
         Py_XDECREF(mod);
         return NULL;
     }
-    if (mod != NULL && PyUnicode_CompareWithASCIIString(mod, "builtins"))
+    if (mod != NULL && _PyUnicode_CompareWithId(mod, &PyId_builtins))
         rtn = PyUnicode_FromFormat("<%U.%U object at %p>", mod, name, self);
     else
         rtn = PyUnicode_FromFormat("<%s object at %p>",
@@ -3178,9 +3466,14 @@ object_richcompare(PyObject *self, PyObject *other, int op)
         break;
 
     case Py_NE:
-        /* By default, != returns the opposite of ==,
+        /* By default, __ne__() delegates to __eq__() and inverts the result,
            unless the latter returns NotImplemented. */
-        res = PyObject_RichCompare(self, other, Py_EQ);
+        if (self->ob_type->tp_richcompare == NULL) {
+            res = Py_NotImplemented;
+            Py_INCREF(res);
+            break;
+        }
+        res = (*self->ob_type->tp_richcompare)(self, other, Py_EQ);
         if (res != NULL && res != Py_NotImplemented) {
             int ok = PyObject_IsTrue(res);
             Py_DECREF(res);
@@ -3313,7 +3606,7 @@ object_set_class(PyObject *self, PyObject *value, void *closure)
                      "__class__ assignment: only for heap types");
         return -1;
     }
-    if (compatible_for_assignment(newto, oldto, "__class__")) {
+    if (compatible_for_assignment(oldto, newto, "__class__")) {
         Py_INCREF(newto);
         Py_TYPE(self) = newto;
         Py_DECREF(oldto);
@@ -3366,150 +3659,438 @@ import_copyreg(void)
     return PyImport_Import(copyreg_str);
 }
 
-static PyObject *
-slotnames(PyObject *cls)
+Py_LOCAL(PyObject *)
+_PyType_GetSlotNames(PyTypeObject *cls)
 {
-    PyObject *clsdict;
     PyObject *copyreg;
     PyObject *slotnames;
     _Py_IDENTIFIER(__slotnames__);
     _Py_IDENTIFIER(_slotnames);
 
-    clsdict = ((PyTypeObject *)cls)->tp_dict;
-    slotnames = _PyDict_GetItemId(clsdict, &PyId___slotnames__);
-    if (slotnames != NULL && PyList_Check(slotnames)) {
+    assert(PyType_Check(cls));
+
+    /* Get the slot names from the cache in the class if possible. */
+    slotnames = _PyDict_GetItemIdWithError(cls->tp_dict, &PyId___slotnames__);
+    if (slotnames != NULL) {
+        if (slotnames != Py_None && !PyList_Check(slotnames)) {
+            PyErr_Format(PyExc_TypeError,
+                         "%.200s.__slotnames__ should be a list or None, "
+                         "not %.200s",
+                         cls->tp_name, Py_TYPE(slotnames)->tp_name);
+            return NULL;
+        }
         Py_INCREF(slotnames);
         return slotnames;
     }
+    else {
+        if (PyErr_Occurred()) {
+            return NULL;
+        }
+        /* The class does not have the slot names cached yet. */
+    }
 
     copyreg = import_copyreg();
     if (copyreg == NULL)
         return NULL;
 
-    slotnames = _PyObject_CallMethodId(copyreg, &PyId__slotnames, "O", cls);
+    /* Use _slotnames function from the copyreg module to find the slots
+       by this class and its bases. This function will cache the result
+       in __slotnames__. */
+    slotnames = _PyObject_CallMethodIdObjArgs(copyreg, &PyId__slotnames,
+                                              cls, NULL);
     Py_DECREF(copyreg);
-    if (slotnames != NULL &&
-        slotnames != Py_None &&
-        !PyList_Check(slotnames))
-    {
+    if (slotnames == NULL)
+        return NULL;
+
+    if (slotnames != Py_None && !PyList_Check(slotnames)) {
         PyErr_SetString(PyExc_TypeError,
-            "copyreg._slotnames didn't return a list or None");
+                        "copyreg._slotnames didn't return a list or None");
         Py_DECREF(slotnames);
-        slotnames = NULL;
+        return NULL;
     }
 
     return slotnames;
 }
 
-static PyObject *
-reduce_2(PyObject *obj)
+Py_LOCAL(PyObject *)
+_PyObject_GetState(PyObject *obj)
 {
-    PyObject *cls, *getnewargs;
-    PyObject *args = NULL, *args2 = NULL;
-    PyObject *getstate = NULL, *state = NULL, *names = NULL;
-    PyObject *slots = NULL, *listitems = NULL, *dictitems = NULL;
-    PyObject *copyreg = NULL, *newobj = NULL, *res = NULL;
-    Py_ssize_t i, n;
-    _Py_IDENTIFIER(__getnewargs__);
+    PyObject *state;
+    PyObject *getstate;
     _Py_IDENTIFIER(__getstate__);
-    _Py_IDENTIFIER(__newobj__);
 
-    cls = (PyObject *) Py_TYPE(obj);
+    getstate = _PyObject_GetAttrId(obj, &PyId___getstate__);
+    if (getstate == NULL) {
+        PyObject *slotnames;
 
-    getnewargs = _PyObject_GetAttrId(obj, &PyId___getnewargs__);
-    if (getnewargs != NULL) {
-        args = PyObject_CallObject(getnewargs, NULL);
-        Py_DECREF(getnewargs);
-        if (args != NULL && !PyTuple_Check(args)) {
-            PyErr_Format(PyExc_TypeError,
-                "__getnewargs__ should return a tuple, "
-                "not '%.200s'", Py_TYPE(args)->tp_name);
-            goto end;
+        if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
+            return NULL;
         }
-    }
-    else {
         PyErr_Clear();
-        args = PyTuple_New(0);
-    }
-    if (args == NULL)
-        goto end;
 
-    getstate = _PyObject_GetAttrId(obj, &PyId___getstate__);
-    if (getstate != NULL) {
-        state = PyObject_CallObject(getstate, NULL);
-        Py_DECREF(getstate);
-        if (state == NULL)
-            goto end;
-    }
-    else {
-        PyObject **dict;
-        PyErr_Clear();
-        dict = _PyObject_GetDictPtr(obj);
-        if (dict && *dict)
-            state = *dict;
-        else
-            state = Py_None;
-        Py_INCREF(state);
-        names = slotnames(cls);
-        if (names == NULL)
-            goto end;
-        if (names != Py_None && PyList_GET_SIZE(names) > 0) {
-            assert(PyList_Check(names));
+        {
+            PyObject **dict;
+            dict = _PyObject_GetDictPtr(obj);
+            /* It is possible that the object's dict is not initialized
+               yet. In this case, we will return None for the state.
+               We also return None if the dict is empty to make the behavior
+               consistent regardless whether the dict was initialized or not.
+               This make unit testing easier. */
+            if (dict != NULL && *dict != NULL && PyDict_Size(*dict) > 0) {
+                state = *dict;
+            }
+            else {
+                state = Py_None;
+            }
+            Py_INCREF(state);
+        }
+
+        slotnames = _PyType_GetSlotNames(Py_TYPE(obj));
+        if (slotnames == NULL) {
+            Py_DECREF(state);
+            return NULL;
+        }
+
+        assert(slotnames == Py_None || PyList_Check(slotnames));
+        if (slotnames != Py_None && Py_SIZE(slotnames) > 0) {
+            PyObject *slots;
+            Py_ssize_t slotnames_size, i;
+
             slots = PyDict_New();
-            if (slots == NULL)
-                goto end;
-            n = 0;
-            /* Can't pre-compute the list size; the list
-               is stored on the class so accessible to other
-               threads, which may be run by DECREF */
-            for (i = 0; i < PyList_GET_SIZE(names); i++) {
+            if (slots == NULL) {
+                Py_DECREF(slotnames);
+                Py_DECREF(state);
+                return NULL;
+            }
+
+            slotnames_size = Py_SIZE(slotnames);
+            for (i = 0; i < slotnames_size; i++) {
                 PyObject *name, *value;
-                name = PyList_GET_ITEM(names, i);
+
+                name = PyList_GET_ITEM(slotnames, i);
+                Py_INCREF(name);
                 value = PyObject_GetAttr(obj, name);
-                if (value == NULL)
+                if (value == NULL) {
+                    Py_DECREF(name);
+                    if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
+                        goto error;
+                    }
+                    /* It is not an error if the attribute is not present. */
                     PyErr_Clear();
+                }
                 else {
-                    int err = PyDict_SetItem(slots, name,
-                                             value);
+                    int err = PyDict_SetItem(slots, name, value);
+                    Py_DECREF(name);
                     Py_DECREF(value);
-                    if (err)
-                        goto end;
-                    n++;
+                    if (err) {
+                        goto error;
+                    }
+                }
+
+                /* The list is stored on the class so it may mutates while we
+                   iterate over it */
+                if (slotnames_size != Py_SIZE(slotnames)) {
+                    PyErr_Format(PyExc_RuntimeError,
+                                 "__slotsname__ changed size during iteration");
+                    goto error;
+                }
+
+                /* We handle errors within the loop here. */
+                if (0) {
+                  error:
+                    Py_DECREF(slotnames);
+                    Py_DECREF(slots);
+                    Py_DECREF(state);
+                    return NULL;
                 }
             }
-            if (n) {
-                state = Py_BuildValue("(NO)", state, slots);
-                if (state == NULL)
-                    goto end;
+
+            /* If we found some slot attributes, pack them in a tuple along
+               the orginal attribute dictionary. */
+            if (PyDict_Size(slots) > 0) {
+                PyObject *state2;
+
+                state2 = PyTuple_Pack(2, state, slots);
+                Py_DECREF(state);
+                if (state2 == NULL) {
+                    Py_DECREF(slotnames);
+                    Py_DECREF(slots);
+                    return NULL;
+                }
+                state = state2;
             }
+            Py_DECREF(slots);
         }
+        Py_DECREF(slotnames);
+    }
+    else { /* getstate != NULL */
+        state = PyObject_CallObject(getstate, NULL);
+        Py_DECREF(getstate);
+        if (state == NULL)
+            return NULL;
+    }
+
+    return state;
+}
+
+Py_LOCAL(int)
+_PyObject_GetNewArguments(PyObject *obj, PyObject **args, PyObject **kwargs)
+{
+    PyObject *getnewargs, *getnewargs_ex;
+    _Py_IDENTIFIER(__getnewargs_ex__);
+    _Py_IDENTIFIER(__getnewargs__);
+
+    if (args == NULL || kwargs == NULL) {
+        PyErr_BadInternalCall();
+        return -1;
+    }
+
+    /* We first attempt to fetch the arguments for __new__ by calling
+       __getnewargs_ex__ on the object. */
+    getnewargs_ex = _PyObject_LookupSpecial(obj, &PyId___getnewargs_ex__);
+    if (getnewargs_ex != NULL) {
+        PyObject *newargs = PyObject_CallObject(getnewargs_ex, NULL);
+        Py_DECREF(getnewargs_ex);
+        if (newargs == NULL) {
+            return -1;
+        }
+        if (!PyTuple_Check(newargs)) {
+            PyErr_Format(PyExc_TypeError,
+                         "__getnewargs_ex__ should return a tuple, "
+                         "not '%.200s'", Py_TYPE(newargs)->tp_name);
+            Py_DECREF(newargs);
+            return -1;
+        }
+        if (Py_SIZE(newargs) != 2) {
+            PyErr_Format(PyExc_ValueError,
+                         "__getnewargs_ex__ should return a tuple of "
+                         "length 2, not %zd", Py_SIZE(newargs));
+            Py_DECREF(newargs);
+            return -1;
+        }
+        *args = PyTuple_GET_ITEM(newargs, 0);
+        Py_INCREF(*args);
+        *kwargs = PyTuple_GET_ITEM(newargs, 1);
+        Py_INCREF(*kwargs);
+        Py_DECREF(newargs);
+
+        /* XXX We should perhaps allow None to be passed here. */
+        if (!PyTuple_Check(*args)) {
+            PyErr_Format(PyExc_TypeError,
+                         "first item of the tuple returned by "
+                         "__getnewargs_ex__ must be a tuple, not '%.200s'",
+                         Py_TYPE(*args)->tp_name);
+            Py_CLEAR(*args);
+            Py_CLEAR(*kwargs);
+            return -1;
+        }
+        if (!PyDict_Check(*kwargs)) {
+            PyErr_Format(PyExc_TypeError,
+                         "second item of the tuple returned by "
+                         "__getnewargs_ex__ must be a dict, not '%.200s'",
+                         Py_TYPE(*kwargs)->tp_name);
+            Py_CLEAR(*args);
+            Py_CLEAR(*kwargs);
+            return -1;
+        }
+        return 0;
+    } else if (PyErr_Occurred()) {
+        return -1;
+    }
+
+    /* The object does not have __getnewargs_ex__ so we fallback on using
+       __getnewargs__ instead. */
+    getnewargs = _PyObject_LookupSpecial(obj, &PyId___getnewargs__);
+    if (getnewargs != NULL) {
+        *args = PyObject_CallObject(getnewargs, NULL);
+        Py_DECREF(getnewargs);
+        if (*args == NULL) {
+            return -1;
+        }
+        if (!PyTuple_Check(*args)) {
+            PyErr_Format(PyExc_TypeError,
+                         "__getnewargs__ should return a tuple, "
+                         "not '%.200s'", Py_TYPE(*args)->tp_name);
+            Py_CLEAR(*args);
+            return -1;
+        }
+        *kwargs = NULL;
+        return 0;
+    } else if (PyErr_Occurred()) {
+        return -1;
+    }
+
+    /* The object does not have __getnewargs_ex__ and __getnewargs__. This may
+       means __new__ does not takes any arguments on this object, or that the
+       object does not implement the reduce protocol for pickling or
+       copying. */
+    *args = NULL;
+    *kwargs = NULL;
+    return 0;
+}
+
+Py_LOCAL(int)
+_PyObject_GetItemsIter(PyObject *obj, PyObject **listitems,
+                       PyObject **dictitems)
+{
+    if (listitems == NULL || dictitems == NULL) {
+        PyErr_BadInternalCall();
+        return -1;
     }
 
     if (!PyList_Check(obj)) {
-        listitems = Py_None;
-        Py_INCREF(listitems);
+        *listitems = Py_None;
+        Py_INCREF(*listitems);
     }
     else {
-        listitems = PyObject_GetIter(obj);
-        if (listitems == NULL)
-            goto end;
+        *listitems = PyObject_GetIter(obj);
+        if (*listitems == NULL)
+            return -1;
     }
 
     if (!PyDict_Check(obj)) {
-        dictitems = Py_None;
-        Py_INCREF(dictitems);
+        *dictitems = Py_None;
+        Py_INCREF(*dictitems);
     }
     else {
+        PyObject *items;
         _Py_IDENTIFIER(items);
-        PyObject *items = _PyObject_CallMethodId(obj, &PyId_items, "");
-        if (items == NULL)
-            goto end;
-        dictitems = PyObject_GetIter(items);
+
+        items = _PyObject_CallMethodIdObjArgs(obj, &PyId_items, NULL);
+        if (items == NULL) {
+            Py_CLEAR(*listitems);
+            return -1;
+        }
+        *dictitems = PyObject_GetIter(items);
         Py_DECREF(items);
-        if (dictitems == NULL)
-            goto end;
+        if (*dictitems == NULL) {
+            Py_CLEAR(*listitems);
+            return -1;
+        }
+    }
+
+    assert(*listitems != NULL && *dictitems != NULL);
+
+    return 0;
+}
+
+static PyObject *
+reduce_4(PyObject *obj)
+{
+    PyObject *args = NULL, *kwargs = NULL;
+    PyObject *copyreg;
+    PyObject *newobj, *newargs, *state, *listitems, *dictitems;
+    PyObject *result;
+    _Py_IDENTIFIER(__newobj_ex__);
+
+    if (Py_TYPE(obj)->tp_new == NULL) {
+        PyErr_Format(PyExc_TypeError,
+                     "can't pickle %s objects",
+                     Py_TYPE(obj)->tp_name);
+        return NULL;
+    }
+    if (_PyObject_GetNewArguments(obj, &args, &kwargs) < 0) {
+        return NULL;
+    }
+    if (args == NULL) {
+        args = PyTuple_New(0);
+        if (args == NULL)
+            return NULL;
+    }
+    if (kwargs == NULL) {
+        kwargs = PyDict_New();
+        if (kwargs == NULL)
+            return NULL;
+    }
+
+    copyreg = import_copyreg();
+    if (copyreg == NULL) {
+        Py_DECREF(args);
+        Py_DECREF(kwargs);
+        return NULL;
+    }
+    newobj = _PyObject_GetAttrId(copyreg, &PyId___newobj_ex__);
+    Py_DECREF(copyreg);
+    if (newobj == NULL) {
+        Py_DECREF(args);
+        Py_DECREF(kwargs);
+        return NULL;
+    }
+    newargs = PyTuple_Pack(3, Py_TYPE(obj), args, kwargs);
+    Py_DECREF(args);
+    Py_DECREF(kwargs);
+    if (newargs == NULL) {
+        Py_DECREF(newobj);
+        return NULL;
+    }
+    state = _PyObject_GetState(obj);
+    if (state == NULL) {
+        Py_DECREF(newobj);
+        Py_DECREF(newargs);
+        return NULL;
+    }
+    if (_PyObject_GetItemsIter(obj, &listitems, &dictitems) < 0) {
+        Py_DECREF(newobj);
+        Py_DECREF(newargs);
+        Py_DECREF(state);
+        return NULL;
+    }
+
+    result = PyTuple_Pack(5, newobj, newargs, state, listitems, dictitems);
+    Py_DECREF(newobj);
+    Py_DECREF(newargs);
+    Py_DECREF(state);
+    Py_DECREF(listitems);
+    Py_DECREF(dictitems);
+    return result;
+}
+
+static PyObject *
+reduce_2(PyObject *obj)
+{
+    PyObject *cls;
+    PyObject *args = NULL, *args2 = NULL, *kwargs = NULL;
+    PyObject *state = NULL, *listitems = NULL, *dictitems = NULL;
+    PyObject *copyreg = NULL, *newobj = NULL, *res = NULL;
+    Py_ssize_t i, n;
+    _Py_IDENTIFIER(__newobj__);
+
+    if (Py_TYPE(obj)->tp_new == NULL) {
+        PyErr_Format(PyExc_TypeError,
+                     "can't pickle %s objects",
+                     Py_TYPE(obj)->tp_name);
+        return NULL;
+    }
+    if (_PyObject_GetNewArguments(obj, &args, &kwargs) < 0) {
+        return NULL;
+    }
+    if (args == NULL) {
+        assert(kwargs == NULL);
+        args = PyTuple_New(0);
+        if (args == NULL) {
+            return NULL;
+        }
+    }
+    else if (kwargs != NULL) {
+        if (PyDict_Size(kwargs) > 0) {
+            PyErr_SetString(PyExc_ValueError,
+                            "must use protocol 4 or greater to copy this "
+                            "object; since __getnewargs_ex__ returned "
+                            "keyword arguments.");
+            Py_DECREF(args);
+            Py_DECREF(kwargs);
+            return NULL;
+        }
+        Py_CLEAR(kwargs);
     }
 
+    state = _PyObject_GetState(obj);
+    if (state == NULL)
+        goto end;
+
+    if (_PyObject_GetItemsIter(obj, &listitems, &dictitems) < 0)
+        goto end;
+
     copyreg = import_copyreg();
     if (copyreg == NULL)
         goto end;
@@ -3521,6 +4102,7 @@ reduce_2(PyObject *obj)
     args2 = PyTuple_New(n+1);
     if (args2 == NULL)
         goto end;
+    cls = (PyObject *) Py_TYPE(obj);
     Py_INCREF(cls);
     PyTuple_SET_ITEM(args2, 0, cls);
     for (i = 0; i < n; i++) {
@@ -3534,9 +4116,7 @@ reduce_2(PyObject *obj)
   end:
     Py_XDECREF(args);
     Py_XDECREF(args2);
-    Py_XDECREF(slots);
     Py_XDECREF(state);
-    Py_XDECREF(names);
     Py_XDECREF(listitems);
     Py_XDECREF(dictitems);
     Py_XDECREF(copyreg);
@@ -3564,7 +4144,9 @@ _common_reduce(PyObject *self, int proto)
 {
     PyObject *copyreg, *res;
 
-    if (proto >= 2)
+    if (proto >= 4)
+        return reduce_4(self);
+    else if (proto >= 2)
         return reduce_2(self);
 
     copyreg = import_copyreg();
@@ -3652,7 +4234,7 @@ PyDoc_STRVAR(object_subclasshook_doc,
 
    class object:
        def __format__(self, format_spec):
-       return format(str(self), format_spec)
+           return format(str(self), format_spec)
 */
 static PyObject *
 object_format(PyObject *self, PyObject *args)
@@ -3669,16 +4251,9 @@ object_format(PyObject *self, PyObject *args)
         /* Issue 7994: If we're converting to a string, we
            should reject format specifications */
         if (PyUnicode_GET_LENGTH(format_spec) > 0) {
-            if (PyErr_WarnEx(PyExc_DeprecationWarning,
-                             "object.__format__ with a non-empty format "
-                             "string is deprecated", 1) < 0) {
-              goto done;
-            }
-            /* Eventually this will become an error:
-               PyErr_Format(PyExc_TypeError,
+            PyErr_SetString(PyExc_TypeError,
                "non-empty format string passed to object.__format__");
-               goto done;
-            */
+            goto done;
         }
 
         result = PyObject_Format(self_as_str, format_spec);
@@ -3698,7 +4273,7 @@ object_sizeof(PyObject *self, PyObject *args)
     res = 0;
     isize = self->ob_type->tp_itemsize;
     if (isize > 0)
-        res = Py_SIZE(self->ob_type) * isize;
+        res = Py_SIZE(self) * isize;
     res += self->ob_type->tp_basicsize;
 
     return PyLong_FromSsize_t(res);
@@ -3788,8 +4363,8 @@ PyTypeObject PyBaseObject_Type = {
     PyObject_GenericGetAttr,                    /* tp_getattro */
     PyObject_GenericSetAttr,                    /* tp_setattro */
     0,                                          /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
-    PyDoc_STR("The most base type"),            /* tp_doc */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,   /* tp_flags */
+    PyDoc_STR("object()\n--\n\nThe most base type"),  /* tp_doc */
     0,                                          /* tp_traverse */
     0,                                          /* tp_clear */
     object_richcompare,                         /* tp_richcompare */
@@ -3833,7 +4408,7 @@ add_methods(PyTypeObject *type, PyMethodDef *meth)
             descr = PyDescr_NewClassMethod(type, meth);
         }
         else if (meth->ml_flags & METH_STATIC) {
-            PyObject *cfunc = PyCFunction_New(meth, (PyObject*)type);
+          PyObject *cfunc = PyCFunction_NewEx(meth, (PyObject*)type, NULL);
             if (cfunc == NULL)
                 return -1;
             descr = PyStaticMethod_New(cfunc);
@@ -4103,6 +4678,10 @@ inherit_slots(PyTypeObject *type, PyTypeObject *base)
         COPYSLOT(tp_init);
         COPYSLOT(tp_alloc);
         COPYSLOT(tp_is_gc);
+        if ((type->tp_flags & Py_TPFLAGS_HAVE_FINALIZE) &&
+            (base->tp_flags & Py_TPFLAGS_HAVE_FINALIZE)) {
+            COPYSLOT(tp_finalize);
+        }
         if ((type->tp_flags & Py_TPFLAGS_HAVE_GC) ==
             (base->tp_flags & Py_TPFLAGS_HAVE_GC)) {
             /* They agree about gc. */
@@ -4215,9 +4794,8 @@ PyType_Ready(PyTypeObject *type)
     }
 
     /* Calculate method resolution order */
-    if (mro_internal(type) < 0) {
+    if (mro_internal(type, NULL) < 0)
         goto error;
-    }
 
     /* Inherit special flags from dominant base */
     if (type->tp_base != NULL)
@@ -4234,6 +4812,23 @@ PyType_Ready(PyTypeObject *type)
             inherit_slots(type, (PyTypeObject *)b);
     }
 
+    /* All bases of statically allocated type should be statically allocated */
+    if (!(type->tp_flags & Py_TPFLAGS_HEAPTYPE))
+        for (i = 0; i < n; i++) {
+            PyObject *b = PyTuple_GET_ITEM(bases, i);
+            if (PyType_Check(b) &&
+                (((PyTypeObject *)b)->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
+                char buf[300];
+                PyOS_snprintf(buf, sizeof(buf),
+                              "type '%.100s' is not dynamically allocated but "
+                              "its base type '%.100s' is dynamically allocated",
+                              type->tp_name, ((PyTypeObject *)b)->tp_name);
+                if (PyErr_Warn(PyExc_DeprecationWarning, buf) < 0)
+                    goto error;
+                break;
+            }
+        }
+
     /* Sanity check for tp_free. */
     if (PyType_IS_GC(type) && (type->tp_flags & Py_TPFLAGS_BASETYPE) &&
         (type->tp_free == NULL || type->tp_free == PyObject_Del)) {
@@ -4252,14 +4847,20 @@ PyType_Ready(PyTypeObject *type)
      */
     if (_PyDict_GetItemId(type->tp_dict, &PyId___doc__) == NULL) {
         if (type->tp_doc != NULL) {
-            PyObject *doc = PyUnicode_FromString(type->tp_doc);
+            const char *old_doc = _PyType_DocWithoutSignature(type->tp_name,
+                type->tp_doc);
+            PyObject *doc = PyUnicode_FromString(old_doc);
             if (doc == NULL)
                 goto error;
-            _PyDict_SetItemId(type->tp_dict, &PyId___doc__, doc);
+            if (_PyDict_SetItemId(type->tp_dict, &PyId___doc__, doc) < 0) {
+                Py_DECREF(doc);
+                goto error;
+            }
             Py_DECREF(doc);
         } else {
-            _PyDict_SetItemId(type->tp_dict,
-                              &PyId___doc__, Py_None);
+            if (_PyDict_SetItemId(type->tp_dict,
+                                  &PyId___doc__, Py_None) < 0)
+                goto error;
         }
     }
 
@@ -4303,13 +4904,11 @@ PyType_Ready(PyTypeObject *type)
     /* Warn for a type that implements tp_compare (now known as
        tp_reserved) but not tp_richcompare. */
     if (type->tp_reserved && !type->tp_richcompare) {
-        int error;
-        error = PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+        PyErr_Format(PyExc_TypeError,
             "Type %.100s defines tp_reserved (formerly tp_compare) "
             "but not tp_richcompare. Comparisons may not behave as intended.",
             type->tp_name);
-        if (error == -1)
-            goto error;
+        goto error;
     }
 
     /* All done -- set the ready flag */
@@ -4326,49 +4925,75 @@ PyType_Ready(PyTypeObject *type)
 static int
 add_subclass(PyTypeObject *base, PyTypeObject *type)
 {
-    Py_ssize_t i;
-    int result;
-    PyObject *list, *ref, *newobj;
+    int result = -1;
+    PyObject *dict, *key, *newobj;
 
-    list = base->tp_subclasses;
-    if (list == NULL) {
-        base->tp_subclasses = list = PyList_New(0);
-        if (list == NULL)
+    dict = base->tp_subclasses;
+    if (dict == NULL) {
+        base->tp_subclasses = dict = PyDict_New();
+        if (dict == NULL)
             return -1;
     }
-    assert(PyList_Check(list));
+    assert(PyDict_CheckExact(dict));
+    key = PyLong_FromVoidPtr((void *) type);
+    if (key == NULL)
+        return -1;
     newobj = PyWeakref_NewRef((PyObject *)type, NULL);
-    i = PyList_GET_SIZE(list);
-    while (--i >= 0) {
-        ref = PyList_GET_ITEM(list, i);
-        assert(PyWeakref_CheckRef(ref));
-        if (PyWeakref_GET_OBJECT(ref) == Py_None)
-            return PyList_SetItem(list, i, newobj);
+    if (newobj != NULL) {
+        result = PyDict_SetItem(dict, key, newobj);
+        Py_DECREF(newobj);
     }
-    result = PyList_Append(list, newobj);
-    Py_DECREF(newobj);
+    Py_DECREF(key);
     return result;
 }
 
+static int
+add_all_subclasses(PyTypeObject *type, PyObject *bases)
+{
+    int res = 0;
+
+    if (bases) {
+        Py_ssize_t i;
+        for (i = 0; i < PyTuple_GET_SIZE(bases); i++) {
+            PyObject *base = PyTuple_GET_ITEM(bases, i);
+            if (PyType_Check(base) &&
+                add_subclass((PyTypeObject*)base, type) < 0)
+                res = -1;
+        }
+    }
+
+    return res;
+}
+
 static void
 remove_subclass(PyTypeObject *base, PyTypeObject *type)
 {
-    Py_ssize_t i;
-    PyObject *list, *ref;
+    PyObject *dict, *key;
 
-    list = base->tp_subclasses;
-    if (list == NULL) {
+    dict = base->tp_subclasses;
+    if (dict == NULL) {
         return;
     }
-    assert(PyList_Check(list));
-    i = PyList_GET_SIZE(list);
-    while (--i >= 0) {
-        ref = PyList_GET_ITEM(list, i);
-        assert(PyWeakref_CheckRef(ref));
-        if (PyWeakref_GET_OBJECT(ref) == (PyObject*)type) {
-            /* this can't fail, right? */
-            PySequence_DelItem(list, i);
-            return;
+    assert(PyDict_CheckExact(dict));
+    key = PyLong_FromVoidPtr((void *) type);
+    if (key == NULL || PyDict_DelItem(dict, key)) {
+        /* This can happen if the type initialization errored out before
+           the base subclasses were updated (e.g. a non-str __qualname__
+           was passed in the type dict). */
+        PyErr_Clear();
+    }
+    Py_XDECREF(key);
+}
+
+static void
+remove_all_subclasses(PyTypeObject *type, PyObject *bases)
+{
+    if (bases) {
+        Py_ssize_t i;
+        for (i = 0; i < PyTuple_GET_SIZE(bases); i++) {
+            PyObject *base = PyTuple_GET_ITEM(bases, i);
+            if (PyType_Check(base))
+                remove_subclass((PyTypeObject*) base, type);
         }
     }
 }
@@ -4725,6 +5350,18 @@ wrap_call(PyObject *self, PyObject *args, void *wrapped, PyObject *kwds)
 }
 
 static PyObject *
+wrap_del(PyObject *self, PyObject *args, void *wrapped)
+{
+    destructor func = (destructor)wrapped;
+
+    if (!check_num_args(args, 0))
+        return NULL;
+
+    (*func)(self);
+    Py_RETURN_NONE;
+}
+
+static PyObject *
 wrap_richcmpfunc(PyObject *self, PyObject *args, void *wrapped, int op)
 {
     richcmpfunc func = (richcmpfunc)wrapped;
@@ -4877,7 +5514,7 @@ tp_new_wrapper(PyObject *self, PyObject *args, PyObject *kwds)
                      "%s.__new__(%s) is not safe, use %s.__new__()",
                      type->tp_name,
                      subtype->tp_name,
-                     staticbase == NULL ? "?" : staticbase->tp_name);
+                     staticbase->tp_name);
         return NULL;
     }
 
@@ -4891,8 +5528,9 @@ tp_new_wrapper(PyObject *self, PyObject *args, PyObject *kwds)
 
 static struct PyMethodDef tp_new_methoddef[] = {
     {"__new__", (PyCFunction)tp_new_wrapper, METH_VARARGS|METH_KEYWORDS,
-     PyDoc_STR("T.__new__(S, ...) -> "
-               "a new object with type S, a subtype of T")},
+     PyDoc_STR("__new__($type, *args, **kwargs)\n--\n\n"
+               "Create and return a new object.  "
+               "See help(type) for accurate signature.")},
     {0}
 };
 
@@ -4903,7 +5541,7 @@ add_tp_new_wrapper(PyTypeObject *type)
 
     if (_PyDict_GetItemId(type->tp_dict, &PyId___new__) != NULL)
         return 0;
-    func = PyCFunction_New(tp_new_methoddef, (PyObject *)type);
+    func = PyCFunction_NewEx(tp_new_methoddef, (PyObject *)type, NULL);
     if (func == NULL)
         return -1;
     if (_PyDict_SetItemId(type->tp_dict, &PyId___new__, func)) {
@@ -5015,7 +5653,6 @@ FUNCNAME(PyObject *self, ARG1TYPE arg1, ARG2TYPE arg2) \
 static Py_ssize_t
 slot_sq_length(PyObject *self)
 {
-    _Py_IDENTIFIER(__len__);
     PyObject *res = call_method(self, &PyId___len__, "()");
     Py_ssize_t len;
 
@@ -5076,8 +5713,6 @@ static int
 slot_sq_ass_item(PyObject *self, Py_ssize_t index, PyObject *value)
 {
     PyObject *res;
-    _Py_IDENTIFIER(__delitem__);
-    _Py_IDENTIFIER(__setitem__);
 
     if (value == NULL)
         res = call_method(self, &PyId___delitem__, "(n)", index);
@@ -5127,8 +5762,6 @@ static int
 slot_mp_ass_subscript(PyObject *self, PyObject *key, PyObject *value)
 {
     PyObject *res;
-    _Py_IDENTIFIER(__delitem__);
-    _Py_IDENTIFIER(__setitem__);
 
     if (value == NULL)
         res = call_method(self, &PyId___delitem__, "(O)", key);
@@ -5179,7 +5812,6 @@ slot_nb_bool(PyObject *self)
     PyObject *func, *args;
     int result = -1;
     int using_len = 0;
-    _Py_IDENTIFIER(__len__);
     _Py_IDENTIFIER(__bool__);
 
     func = lookup_maybe(self, &PyId___bool__);
@@ -5281,28 +5913,11 @@ slot_tp_str(PyObject *self)
     _Py_IDENTIFIER(__str__);
 
     func = lookup_method(self, &PyId___str__);
-    if (func != NULL) {
-        res = PyEval_CallObject(func, NULL);
-        Py_DECREF(func);
-        return res;
-    }
-    else {
-        /* PyObject *ress; */
-        PyErr_Clear();
-        res = slot_tp_repr(self);
-        if (!res)
-            return NULL;
-        /* XXX this is non-sensical. Why should we return
-           a bytes object from __str__. Is this code even
-           used? - mvl */
-        assert(0);
-        return res;
-        /*
-        ress = _PyUnicode_AsDefaultEncodedString(res);
-        Py_DECREF(res);
-        return ress;
-        */
-    }
+    if (func == NULL)
+        return NULL;
+    res = PyEval_CallObject(func, NULL);
+    Py_DECREF(func);
+    return res;
 }
 
 static Py_hash_t
@@ -5598,7 +6213,6 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     PyObject *func;
     PyObject *newargs, *x;
     Py_ssize_t i, n;
-    _Py_IDENTIFIER(__new__);
 
     func = _PyObject_GetAttrId((PyObject *)type, &PyId___new__);
     if (func == NULL)
@@ -5622,16 +6236,12 @@ slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 }
 
 static void
-slot_tp_del(PyObject *self)
+slot_tp_finalize(PyObject *self)
 {
     _Py_IDENTIFIER(__del__);
     PyObject *del, *res;
     PyObject *error_type, *error_value, *error_traceback;
 
-    /* Temporarily resurrect the object. */
-    assert(self->ob_refcnt == 0);
-    self->ob_refcnt = 1;
-
     /* Save the current exception, if any. */
     PyErr_Fetch(&error_type, &error_value, &error_traceback);
 
@@ -5648,37 +6258,6 @@ slot_tp_del(PyObject *self)
 
     /* Restore the saved exception. */
     PyErr_Restore(error_type, error_value, error_traceback);
-
-    /* Undo the temporary resurrection; can't use DECREF here, it would
-     * cause a recursive call.
-     */
-    assert(self->ob_refcnt > 0);
-    if (--self->ob_refcnt == 0)
-        return;         /* this is the normal path out */
-
-    /* __del__ resurrected it!  Make it look like the original Py_DECREF
-     * never happened.
-     */
-    {
-        Py_ssize_t refcnt = self->ob_refcnt;
-        _Py_NewReference(self);
-        self->ob_refcnt = refcnt;
-    }
-    assert(!PyType_IS_GC(Py_TYPE(self)) ||
-           _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED);
-    /* If Py_REF_DEBUG, _Py_NewReference bumped _Py_RefTotal, so
-     * we need to undo that. */
-    _Py_DEC_REFTOTAL;
-    /* If Py_TRACE_REFS, _Py_NewReference re-added self to the object
-     * chain, so no more to do there.
-     * If COUNT_ALLOCS, the original decref bumped tp_frees, and
-     * _Py_NewReference bumped tp_allocs:  both of those need to be
-     * undone.
-     */
-#ifdef COUNT_ALLOCS
-    --Py_TYPE(self)->tp_frees;
-    --Py_TYPE(self)->tp_allocs;
-#endif
 }
 
 
@@ -5723,22 +6302,22 @@ typedef struct wrapperbase slotdef;
     ETSLOT(NAME, as_number.SLOT, FUNCTION, WRAPPER, DOC)
 #define UNSLOT(NAME, SLOT, FUNCTION, WRAPPER, DOC) \
     ETSLOT(NAME, as_number.SLOT, FUNCTION, WRAPPER, \
-           "x." NAME "() <==> " DOC)
+           NAME "($self, /)\n--\n\n" DOC)
 #define IBSLOT(NAME, SLOT, FUNCTION, WRAPPER, DOC) \
     ETSLOT(NAME, as_number.SLOT, FUNCTION, WRAPPER, \
-           "x." NAME "(y) <==> x" DOC "y")
+           NAME "($self, value, /)\n--\n\nReturn self" DOC "value.")
 #define BINSLOT(NAME, SLOT, FUNCTION, DOC) \
     ETSLOT(NAME, as_number.SLOT, FUNCTION, wrap_binaryfunc_l, \
-           "x." NAME "(y) <==> x" DOC "y")
+           NAME "($self, value, /)\n--\n\nReturn self" DOC "value.")
 #define RBINSLOT(NAME, SLOT, FUNCTION, DOC) \
     ETSLOT(NAME, as_number.SLOT, FUNCTION, wrap_binaryfunc_r, \
-           "x." NAME "(y) <==> y" DOC "x")
+           NAME "($self, value, /)\n--\n\nReturn value" DOC "self.")
 #define BINSLOTNOTINFIX(NAME, SLOT, FUNCTION, DOC) \
     ETSLOT(NAME, as_number.SLOT, FUNCTION, wrap_binaryfunc_l, \
-           "x." NAME "(y) <==> " DOC)
+           NAME "($self, value, /)\n--\n\n" DOC)
 #define RBINSLOTNOTINFIX(NAME, SLOT, FUNCTION, DOC) \
     ETSLOT(NAME, as_number.SLOT, FUNCTION, wrap_binaryfunc_r, \
-           "x." NAME "(y) <==> " DOC)
+           NAME "($self, value, /)\n--\n\n" DOC)
 
 static slotdef slotdefs[] = {
     TPSLOT("__getattribute__", tp_getattr, NULL, NULL, ""),
@@ -5746,80 +6325,85 @@ static slotdef slotdefs[] = {
     TPSLOT("__setattr__", tp_setattr, NULL, NULL, ""),
     TPSLOT("__delattr__", tp_setattr, NULL, NULL, ""),
     TPSLOT("__repr__", tp_repr, slot_tp_repr, wrap_unaryfunc,
-           "x.__repr__() <==> repr(x)"),
+           "__repr__($self, /)\n--\n\nReturn repr(self)."),
     TPSLOT("__hash__", tp_hash, slot_tp_hash, wrap_hashfunc,
-           "x.__hash__() <==> hash(x)"),
+           "__hash__($self, /)\n--\n\nReturn hash(self)."),
     FLSLOT("__call__", tp_call, slot_tp_call, (wrapperfunc)wrap_call,
-           "x.__call__(...) <==> x(...)", PyWrapperFlag_KEYWORDS),
+           "__call__($self, /, *args, **kwargs)\n--\n\nCall self as a function.",
+           PyWrapperFlag_KEYWORDS),
     TPSLOT("__str__", tp_str, slot_tp_str, wrap_unaryfunc,
-           "x.__str__() <==> str(x)"),
+           "__str__($self, /)\n--\n\nReturn str(self)."),
     TPSLOT("__getattribute__", tp_getattro, slot_tp_getattr_hook,
-           wrap_binaryfunc, "x.__getattribute__('name') <==> x.name"),
+           wrap_binaryfunc,
+           "__getattribute__($self, name, /)\n--\n\nReturn getattr(self, name)."),
     TPSLOT("__getattr__", tp_getattro, slot_tp_getattr_hook, NULL, ""),
     TPSLOT("__setattr__", tp_setattro, slot_tp_setattro, wrap_setattr,
-           "x.__setattr__('name', value) <==> x.name = value"),
+           "__setattr__($self, name, value, /)\n--\n\nImplement setattr(self, name, value)."),
     TPSLOT("__delattr__", tp_setattro, slot_tp_setattro, wrap_delattr,
-           "x.__delattr__('name') <==> del x.name"),
+           "__delattr__($self, name, /)\n--\n\nImplement delattr(self, name)."),
     TPSLOT("__lt__", tp_richcompare, slot_tp_richcompare, richcmp_lt,
-           "x.__lt__(y) <==> x<y"),
+           "__lt__($self, value, /)\n--\n\nReturn self<value."),
     TPSLOT("__le__", tp_richcompare, slot_tp_richcompare, richcmp_le,
-           "x.__le__(y) <==> x<=y"),
+           "__le__($self, value, /)\n--\n\nReturn self<=value."),
     TPSLOT("__eq__", tp_richcompare, slot_tp_richcompare, richcmp_eq,
-           "x.__eq__(y) <==> x==y"),
+           "__eq__($self, value, /)\n--\n\nReturn self==value."),
     TPSLOT("__ne__", tp_richcompare, slot_tp_richcompare, richcmp_ne,
-           "x.__ne__(y) <==> x!=y"),
+           "__ne__($self, value, /)\n--\n\nReturn self!=value."),
     TPSLOT("__gt__", tp_richcompare, slot_tp_richcompare, richcmp_gt,
-           "x.__gt__(y) <==> x>y"),
+           "__gt__($self, value, /)\n--\n\nReturn self>value."),
     TPSLOT("__ge__", tp_richcompare, slot_tp_richcompare, richcmp_ge,
-           "x.__ge__(y) <==> x>=y"),
+           "__ge__($self, value, /)\n--\n\nReturn self>=value."),
     TPSLOT("__iter__", tp_iter, slot_tp_iter, wrap_unaryfunc,
-           "x.__iter__() <==> iter(x)"),
+           "__iter__($self, /)\n--\n\nImplement iter(self)."),
     TPSLOT("__next__", tp_iternext, slot_tp_iternext, wrap_next,
-           "x.__next__() <==> next(x)"),
+           "__next__($self, /)\n--\n\nImplement next(self)."),
     TPSLOT("__get__", tp_descr_get, slot_tp_descr_get, wrap_descr_get,
-           "descr.__get__(obj[, type]) -> value"),
+           "__get__($self, instance, owner, /)\n--\n\nReturn an attribute of instance, which is of type owner."),
     TPSLOT("__set__", tp_descr_set, slot_tp_descr_set, wrap_descr_set,
-           "descr.__set__(obj, value)"),
+           "__set__($self, instance, value, /)\n--\n\nSet an attribute of instance to value."),
     TPSLOT("__delete__", tp_descr_set, slot_tp_descr_set,
-           wrap_descr_delete, "descr.__delete__(obj)"),
+           wrap_descr_delete,
+           "__delete__($self, instance, /)\n--\n\nDelete an attribute of instance."),
     FLSLOT("__init__", tp_init, slot_tp_init, (wrapperfunc)wrap_init,
-           "x.__init__(...) initializes x; "
-           "see help(type(x)) for signature",
+           "__init__($self, /, *args, **kwargs)\n--\n\n"
+           "Initialize self.  See help(type(self)) for accurate signature.",
            PyWrapperFlag_KEYWORDS),
-    TPSLOT("__new__", tp_new, slot_tp_new, NULL, ""),
-    TPSLOT("__del__", tp_del, slot_tp_del, NULL, ""),
+    TPSLOT("__new__", tp_new, slot_tp_new, NULL,
+           "__new__(type, /, *args, **kwargs)\n--\n\n"
+           "Create and return new object.  See help(type) for accurate signature."),
+    TPSLOT("__del__", tp_finalize, slot_tp_finalize, (wrapperfunc)wrap_del, ""),
 
     BINSLOT("__add__", nb_add, slot_nb_add,
-        "+"),
+           "+"),
     RBINSLOT("__radd__", nb_add, slot_nb_add,
-             "+"),
+           "+"),
     BINSLOT("__sub__", nb_subtract, slot_nb_subtract,
-        "-"),
+           "-"),
     RBINSLOT("__rsub__", nb_subtract, slot_nb_subtract,
-             "-"),
+           "-"),
     BINSLOT("__mul__", nb_multiply, slot_nb_multiply,
-        "*"),
+           "*"),
     RBINSLOT("__rmul__", nb_multiply, slot_nb_multiply,
-             "*"),
+           "*"),
     BINSLOT("__mod__", nb_remainder, slot_nb_remainder,
-        "%"),
+           "%"),
     RBINSLOT("__rmod__", nb_remainder, slot_nb_remainder,
-             "%"),
+           "%"),
     BINSLOTNOTINFIX("__divmod__", nb_divmod, slot_nb_divmod,
-        "divmod(x, y)"),
+           "Return divmod(self, value)."),
     RBINSLOTNOTINFIX("__rdivmod__", nb_divmod, slot_nb_divmod,
-             "divmod(y, x)"),
+           "Return divmod(value, self)."),
     NBSLOT("__pow__", nb_power, slot_nb_power, wrap_ternaryfunc,
-           "x.__pow__(y[, z]) <==> pow(x, y[, z])"),
+           "__pow__($self, value, mod=None, /)\n--\n\nReturn pow(self, value, mod)."),
     NBSLOT("__rpow__", nb_power, slot_nb_power, wrap_ternaryfunc_r,
-           "y.__rpow__(x[, z]) <==> pow(x, y[, z])"),
-    UNSLOT("__neg__", nb_negative, slot_nb_negative, wrap_unaryfunc, "-x"),
-    UNSLOT("__pos__", nb_positive, slot_nb_positive, wrap_unaryfunc, "+x"),
+           "__rpow__($self, value, mod=None, /)\n--\n\nReturn pow(value, self, mod)."),
+    UNSLOT("__neg__", nb_negative, slot_nb_negative, wrap_unaryfunc, "-self"),
+    UNSLOT("__pos__", nb_positive, slot_nb_positive, wrap_unaryfunc, "+self"),
     UNSLOT("__abs__", nb_absolute, slot_nb_absolute, wrap_unaryfunc,
-           "abs(x)"),
+           "abs(self)"),
     UNSLOT("__bool__", nb_bool, slot_nb_bool, wrap_inquirypred,
-           "x != 0"),
-    UNSLOT("__invert__", nb_invert, slot_nb_invert, wrap_unaryfunc, "~x"),
+           "self != 0"),
+    UNSLOT("__invert__", nb_invert, slot_nb_invert, wrap_unaryfunc, "~self"),
     BINSLOT("__lshift__", nb_lshift, slot_nb_lshift, "<<"),
     RBINSLOT("__rlshift__", nb_lshift, slot_nb_lshift, "<<"),
     BINSLOT("__rshift__", nb_rshift, slot_nb_rshift, ">>"),
@@ -5831,9 +6415,9 @@ static slotdef slotdefs[] = {
     BINSLOT("__or__", nb_or, slot_nb_or, "|"),
     RBINSLOT("__ror__", nb_or, slot_nb_or, "|"),
     UNSLOT("__int__", nb_int, slot_nb_int, wrap_unaryfunc,
-           "int(x)"),
+           "int(self)"),
     UNSLOT("__float__", nb_float, slot_nb_float, wrap_unaryfunc,
-           "float(x)"),
+           "float(self)"),
     IBSLOT("__iadd__", nb_inplace_add, slot_nb_inplace_add,
            wrap_binaryfunc, "+="),
     IBSLOT("__isub__", nb_inplace_subtract, slot_nb_inplace_subtract,
@@ -5859,49 +6443,52 @@ static slotdef slotdefs[] = {
     BINSLOT("__truediv__", nb_true_divide, slot_nb_true_divide, "/"),
     RBINSLOT("__rtruediv__", nb_true_divide, slot_nb_true_divide, "/"),
     IBSLOT("__ifloordiv__", nb_inplace_floor_divide,
-           slot_nb_inplace_floor_divide, wrap_binaryfunc, "//"),
+           slot_nb_inplace_floor_divide, wrap_binaryfunc, "//="),
     IBSLOT("__itruediv__", nb_inplace_true_divide,
-           slot_nb_inplace_true_divide, wrap_binaryfunc, "/"),
+           slot_nb_inplace_true_divide, wrap_binaryfunc, "/="),
     NBSLOT("__index__", nb_index, slot_nb_index, wrap_unaryfunc,
-           "x[y:z] <==> x[y.__index__():z.__index__()]"),
-
+           "__index__($self, /)\n--\n\n"
+           "Return self converted to an integer, if self is suitable "
+           "for use as an index into a list."),
     MPSLOT("__len__", mp_length, slot_mp_length, wrap_lenfunc,
-           "x.__len__() <==> len(x)"),
+           "__len__($self, /)\n--\n\nReturn len(self)."),
     MPSLOT("__getitem__", mp_subscript, slot_mp_subscript,
            wrap_binaryfunc,
-           "x.__getitem__(y) <==> x[y]"),
+           "__getitem__($self, key, /)\n--\n\nReturn self[key]."),
     MPSLOT("__setitem__", mp_ass_subscript, slot_mp_ass_subscript,
            wrap_objobjargproc,
-           "x.__setitem__(i, y) <==> x[i]=y"),
+           "__setitem__($self, key, value, /)\n--\n\nSet self[key] to value."),
     MPSLOT("__delitem__", mp_ass_subscript, slot_mp_ass_subscript,
            wrap_delitem,
-           "x.__delitem__(y) <==> del x[y]"),
+           "__delitem__($self, key, /)\n--\n\nDelete self[key]."),
 
     SQSLOT("__len__", sq_length, slot_sq_length, wrap_lenfunc,
-           "x.__len__() <==> len(x)"),
+           "__len__($self, /)\n--\n\nReturn len(self)."),
     /* Heap types defining __add__/__mul__ have sq_concat/sq_repeat == NULL.
        The logic in abstract.c always falls back to nb_add/nb_multiply in
        this case.  Defining both the nb_* and the sq_* slots to call the
        user-defined methods has unexpected side-effects, as shown by
        test_descr.notimplemented() */
     SQSLOT("__add__", sq_concat, NULL, wrap_binaryfunc,
-      "x.__add__(y) <==> x+y"),
+           "__add__($self, value, /)\n--\n\nReturn self+value."),
     SQSLOT("__mul__", sq_repeat, NULL, wrap_indexargfunc,
-      "x.__mul__(n) <==> x*n"),
+           "__mul__($self, value, /)\n--\n\nReturn self*value.n"),
     SQSLOT("__rmul__", sq_repeat, NULL, wrap_indexargfunc,
-      "x.__rmul__(n) <==> n*x"),
+           "__rmul__($self, value, /)\n--\n\nReturn self*value."),
     SQSLOT("__getitem__", sq_item, slot_sq_item, wrap_sq_item,
-           "x.__getitem__(y) <==> x[y]"),
+           "__getitem__($self, key, /)\n--\n\nReturn self[key]."),
     SQSLOT("__setitem__", sq_ass_item, slot_sq_ass_item, wrap_sq_setitem,
-           "x.__setitem__(i, y) <==> x[i]=y"),
+           "__setitem__($self, key, value, /)\n--\n\nSet self[key] to value."),
     SQSLOT("__delitem__", sq_ass_item, slot_sq_ass_item, wrap_sq_delitem,
-           "x.__delitem__(y) <==> del x[y]"),
+           "__delitem__($self, key, /)\n--\n\nDelete self[key]."),
     SQSLOT("__contains__", sq_contains, slot_sq_contains, wrap_objobjproc,
-           "x.__contains__(y) <==> y in x"),
+           "__contains__($self, key, /)\n--\n\nReturn key in self."),
     SQSLOT("__iadd__", sq_inplace_concat, NULL,
-      wrap_binaryfunc, "x.__iadd__(y) <==> x+=y"),
+           wrap_binaryfunc,
+           "__iadd__($self, value, /)\n--\n\nImplement self+=value."),
     SQSLOT("__imul__", sq_inplace_repeat, NULL,
-      wrap_indexargfunc, "x.__imul__(y) <==> x*=y"),
+           wrap_indexargfunc,
+           "__imul__($self, value, /)\n--\n\nImplement self*=value."),
 
     {NULL}
 };
@@ -6184,15 +6771,14 @@ recurse_down_subclasses(PyTypeObject *type, PyObject *name,
 {
     PyTypeObject *subclass;
     PyObject *ref, *subclasses, *dict;
-    Py_ssize_t i, n;
+    Py_ssize_t i;
 
     subclasses = type->tp_subclasses;
     if (subclasses == NULL)
         return 0;
-    assert(PyList_Check(subclasses));
-    n = PyList_GET_SIZE(subclasses);
-    for (i = 0; i < n; i++) {
-        ref = PyList_GET_ITEM(subclasses, i);
+    assert(PyDict_CheckExact(subclasses));
+    i = 0;
+    while (PyDict_Next(subclasses, &i, NULL, &ref)) {
         assert(PyWeakref_CheckRef(ref));
         subclass = (PyTypeObject *)PyWeakref_GET_OBJECT(ref);
         assert(subclass != NULL);
@@ -6332,70 +6918,74 @@ static PyObject *
 super_getattro(PyObject *self, PyObject *name)
 {
     superobject *su = (superobject *)self;
-    int skip = su->obj_type == NULL;
+    PyTypeObject *starttype;
+    PyObject *mro;
+    Py_ssize_t i, n;
 
-    if (!skip) {
-        /* We want __class__ to return the class of the super object
-           (i.e. super, or a subclass), not the class of su->obj. */
-        skip = (PyUnicode_Check(name) &&
-            PyUnicode_GET_LENGTH(name) == 9 &&
-            PyUnicode_CompareWithASCIIString(name, "__class__") == 0);
+    starttype = su->obj_type;
+    if (starttype == NULL)
+        goto skip;
+
+    /* We want __class__ to return the class of the super object
+       (i.e. super, or a subclass), not the class of su->obj. */
+    if (PyUnicode_Check(name) &&
+        PyUnicode_GET_LENGTH(name) == 9 &&
+        _PyUnicode_CompareWithId(name, &PyId___class__) == 0)
+        goto skip;
+
+    mro = starttype->tp_mro;
+    if (mro == NULL)
+        goto skip;
+
+    assert(PyTuple_Check(mro));
+    n = PyTuple_GET_SIZE(mro);
+
+    /* No need to check the last one: it's gonna be skipped anyway.  */
+    for (i = 0; i+1 < n; i++) {
+        if ((PyObject *)(su->type) == PyTuple_GET_ITEM(mro, i))
+            break;
     }
+    i++;  /* skip su->type (if any)  */
+    if (i >= n)
+        goto skip;
 
-    if (!skip) {
-        PyObject *mro, *res, *tmp, *dict;
-        PyTypeObject *starttype;
+    /* keep a strong reference to mro because starttype->tp_mro can be
+       replaced during PyDict_GetItem(dict, name)  */
+    Py_INCREF(mro);
+    do {
+        PyObject *res, *tmp, *dict;
         descrgetfunc f;
-        Py_ssize_t i, n;
 
-        starttype = su->obj_type;
-        mro = starttype->tp_mro;
+        tmp = PyTuple_GET_ITEM(mro, i);
+        assert(PyType_Check(tmp));
 
-        if (mro == NULL)
-            n = 0;
-        else {
-            assert(PyTuple_Check(mro));
-            n = PyTuple_GET_SIZE(mro);
-        }
-        for (i = 0; i < n; i++) {
-            if ((PyObject *)(su->type) == PyTuple_GET_ITEM(mro, i))
-                break;
-        }
-        i++;
-        res = NULL;
-        /* keep a strong reference to mro because starttype->tp_mro can be
-           replaced during PyDict_GetItem(dict, name)  */
-        Py_INCREF(mro);
-        for (; i < n; i++) {
-            tmp = PyTuple_GET_ITEM(mro, i);
-            if (PyType_Check(tmp))
-                dict = ((PyTypeObject *)tmp)->tp_dict;
-            else
-                continue;
-            res = PyDict_GetItem(dict, name);
-            if (res != NULL) {
-                Py_INCREF(res);
-                f = Py_TYPE(res)->tp_descr_get;
-                if (f != NULL) {
-                    tmp = f(res,
-                        /* Only pass 'obj' param if
-                           this is instance-mode super
-                           (See SF ID #743627)
-                        */
-                        (su->obj == (PyObject *)
-                                    su->obj_type
-                            ? (PyObject *)NULL
-                            : su->obj),
-                        (PyObject *)starttype);
-                    Py_DECREF(res);
-                    res = tmp;
-                }
-                Py_DECREF(mro);
-                return res;
+        dict = ((PyTypeObject *)tmp)->tp_dict;
+        assert(dict != NULL && PyDict_Check(dict));
+
+        res = PyDict_GetItem(dict, name);
+        if (res != NULL) {
+            Py_INCREF(res);
+
+            f = Py_TYPE(res)->tp_descr_get;
+            if (f != NULL) {
+                tmp = f(res,
+                    /* Only pass 'obj' param if this is instance-mode super
+                       (See SF ID #743627)  */
+                    (su->obj == (PyObject *)starttype) ? NULL : su->obj,
+                    (PyObject *)starttype);
+                Py_DECREF(res);
+                res = tmp;
             }
+
+            Py_DECREF(mro);
+            return res;
         }
-        Py_DECREF(mro);
-    }
+
+        i++;
+    } while (i < n);
+    Py_DECREF(mro);
+
+  skip:
     return PyObject_GenericGetAttr(self, name);
 }
 
@@ -6505,9 +7095,16 @@ super_init(PyObject *self, PyObject *args, PyObject *kwds)
     if (type == NULL) {
         /* Call super(), without args -- fill in from __class__
            and first local variable on the stack. */
-        PyFrameObject *f = PyThreadState_GET()->frame;
-        PyCodeObject *co = f->f_code;
+        PyFrameObject *f;
+        PyCodeObject *co;
         Py_ssize_t i, n;
+        f = PyThreadState_GET()->frame;
+        if (f == NULL) {
+            PyErr_SetString(PyExc_RuntimeError,
+                            "super(): no current frame");
+            return -1;
+        }
+        co = f->f_code;
         if (co == NULL) {
             PyErr_SetString(PyExc_RuntimeError,
                             "super(): no code object");
@@ -6545,8 +7142,7 @@ super_init(PyObject *self, PyObject *args, PyObject *kwds)
         for (i = 0; i < n; i++) {
             PyObject *name = PyTuple_GET_ITEM(co->co_freevars, i);
             assert(PyUnicode_Check(name));
-            if (!PyUnicode_CompareWithASCIIString(name,
-                                                  "__class__")) {
+            if (!_PyUnicode_CompareWithId(name, &PyId___class__)) {
                 Py_ssize_t index = co->co_nlocals +
                     PyTuple_GET_SIZE(co->co_cellvars) + i;
                 PyObject *cell = f->f_localsplus[index];
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
index a572c12..ea540d6 100644
--- a/Objects/unicodectype.c
+++ b/Objects/unicodectype.c
@@ -61,7 +61,7 @@ gettyperecord(Py_UCS4 code)
 /* Returns the titlecase Unicode characters corresponding to ch or just
    ch if no titlecase mapping is known. */
 
-Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch)
+Py_UCS4 _PyUnicode_ToTitlecase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2bc34c5..193d898 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -47,13 +47,10 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #include <windows.h>
 #endif
 
-/* Endianness switches; defaults to little endian */
-
-#ifdef WORDS_BIGENDIAN
-# define BYTEORDER_IS_BIG_ENDIAN
-#else
-# define BYTEORDER_IS_LITTLE_ENDIAN
-#endif
+/*[clinic input]
+class str "PyUnicodeObject *" "&PyUnicode_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=604e916854800fa8]*/
 
 /* --- Globals ------------------------------------------------------------
 
@@ -130,16 +127,14 @@ extern "C" {
 /* true if the Unicode object has an allocated UTF-8 memory block
    (not shared with other data) */
 #define _PyUnicode_HAS_UTF8_MEMORY(op)                  \
-    (assert(_PyUnicode_CHECK(op)),                      \
-     (!PyUnicode_IS_COMPACT_ASCII(op)                   \
+    ((!PyUnicode_IS_COMPACT_ASCII(op)                   \
       && _PyUnicode_UTF8(op)                            \
       && _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
 
 /* true if the Unicode object has an allocated wstr memory block
    (not shared with other data) */
 #define _PyUnicode_HAS_WSTR_MEMORY(op)                  \
-    (assert(_PyUnicode_CHECK(op)),                      \
-     (_PyUnicode_WSTR(op) &&                            \
+    ((_PyUnicode_WSTR(op) &&                            \
       (!PyUnicode_IS_READY(op) ||                       \
        _PyUnicode_WSTR(op) != PyUnicode_DATA(op))))
 
@@ -150,9 +145,9 @@ extern "C" {
    buffer where the result characters are written to. */
 #define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
     do {                                                \
-        to_type *_to = (to_type *) to;                  \
-        const from_type *_iter = (begin);               \
-        const from_type *_end = (end);                  \
+        to_type *_to = (to_type *)(to);                \
+        const from_type *_iter = (from_type *)(begin);  \
+        const from_type *_end = (from_type *)(end);     \
         Py_ssize_t n = (_end) - (_iter);                \
         const from_type *_unrolled_end =                \
             _iter + _Py_SIZE_ROUND_DOWN(n, 4);          \
@@ -199,6 +194,10 @@ static PyObject *unicode_empty = NULL;
         return unicode_empty;                           \
     } while (0)
 
+/* Forward declaration */
+Py_LOCAL_INLINE(int)
+_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
+
 /* List of static strings. */
 static _Py_Identifier *static_strings = NULL;
 
@@ -427,8 +426,6 @@ unicode_result_wchar(PyObject *unicode)
 #ifndef Py_DEBUG
     Py_ssize_t len;
 
-    assert(Py_REFCNT(unicode) == 1);
-
     len = _PyUnicode_WSTR_LENGTH(unicode);
     if (len == 0) {
         Py_DECREF(unicode);
@@ -445,10 +442,12 @@ unicode_result_wchar(PyObject *unicode)
     }
 
     if (_PyUnicode_Ready(unicode) < 0) {
-        Py_XDECREF(unicode);
+        Py_DECREF(unicode);
         return NULL;
     }
 #else
+    assert(Py_REFCNT(unicode) == 1);
+
     /* don't make the result ready in debug mode to ensure that the caller
        makes the string ready before using it */
     assert(_PyUnicode_CheckConsistency(unicode, 1));
@@ -471,7 +470,9 @@ unicode_result_ready(PyObject *unicode)
     }
 
     if (length == 1) {
-        Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
+        void *data = PyUnicode_DATA(unicode);
+        int kind = PyUnicode_KIND(unicode);
+        Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
         if (ch < 256) {
             PyObject *latin1_char = unicode_latin1[ch];
             if (latin1_char != NULL) {
@@ -544,7 +545,6 @@ static OSVERSIONINFOEX winver;
 
 static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 
-#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 #define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 
 #define BLOOM_LINEBREAK(ch)                                             \
@@ -554,21 +554,40 @@ static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
 Py_LOCAL_INLINE(BLOOM_MASK)
 make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 {
+#define BLOOM_UPDATE(TYPE, MASK, PTR, LEN)             \
+    do {                                               \
+        TYPE *data = (TYPE *)PTR;                      \
+        TYPE *end = data + LEN;                        \
+        Py_UCS4 ch;                                    \
+        for (; data != end; data++) {                  \
+            ch = *data;                                \
+            MASK |= (1UL << (ch & (BLOOM_WIDTH - 1))); \
+        }                                              \
+        break;                                         \
+    } while (0)
+
     /* calculate simple bloom-style bitmask for a given unicode string */
 
     BLOOM_MASK mask;
-    Py_ssize_t i;
 
     mask = 0;
-    for (i = 0; i < len; i++)
-        BLOOM_ADD(mask, PyUnicode_READ(kind, ptr, i));
-
+    switch (kind) {
+    case PyUnicode_1BYTE_KIND:
+        BLOOM_UPDATE(Py_UCS1, mask, ptr, len);
+        break;
+    case PyUnicode_2BYTE_KIND:
+        BLOOM_UPDATE(Py_UCS2, mask, ptr, len);
+        break;
+    case PyUnicode_4BYTE_KIND:
+        BLOOM_UPDATE(Py_UCS4, mask, ptr, len);
+        break;
+    default:
+        assert(0);
+    }
     return mask;
-}
 
-#define BLOOM_MEMBER(mask, chr, str) \
-    (BLOOM(mask, chr) \
-     && (PyUnicode_FindChar(str, chr, 0, PyUnicode_GET_LENGTH(str), 1) >= 0))
+#undef BLOOM_UPDATE
+}
 
 /* Compilation of templated routines */
 
@@ -588,6 +607,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 #include "stringlib/split.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
+#include "stringlib/replace.h"
 #include "stringlib/find_max_char.h"
 #include "stringlib/localeutil.h"
 #include "stringlib/undef.h"
@@ -598,6 +618,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 #include "stringlib/split.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
+#include "stringlib/replace.h"
 #include "stringlib/find_max_char.h"
 #include "stringlib/localeutil.h"
 #include "stringlib/undef.h"
@@ -608,6 +629,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
 #include "stringlib/split.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
+#include "stringlib/replace.h"
 #include "stringlib/find_max_char.h"
 #include "stringlib/localeutil.h"
 #include "stringlib/undef.h"
@@ -654,6 +676,25 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind,
     }
 }
 
+#ifdef Py_DEBUG
+/* Fill the data of an Unicode string with invalid characters to detect bugs
+   earlier.
+
+   _PyUnicode_CheckConsistency(str, 1) detects invalid characters, at least for
+   ASCII and UCS-4 strings. U+00FF is invalid in ASCII and U+FFFFFFFF is an
+   invalid character in Unicode 6.0. */
+static void
+unicode_fill_invalid(PyObject *unicode, Py_ssize_t old_length)
+{
+    int kind = PyUnicode_KIND(unicode);
+    Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode);
+    Py_ssize_t length = _PyUnicode_LENGTH(unicode);
+    if (length <= old_length)
+        return;
+    memset(data + old_length * kind, 0xff, (length - old_length) * kind);
+}
+#endif
+
 static PyObject*
 resize_compact(PyObject *unicode, Py_ssize_t length)
 {
@@ -662,6 +703,10 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
     Py_ssize_t new_size;
     int share_wstr;
     PyObject *new_unicode;
+#ifdef Py_DEBUG
+    Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
+
     assert(unicode_modifiable(unicode));
     assert(PyUnicode_IS_READY(unicode));
     assert(PyUnicode_IS_COMPACT(unicode));
@@ -679,6 +724,11 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
     }
     new_size = (struct_size + (length + 1) * char_size);
 
+    if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
+        PyObject_DEL(_PyUnicode_UTF8(unicode));
+        _PyUnicode_UTF8(unicode) = NULL;
+        _PyUnicode_UTF8_LENGTH(unicode) = 0;
+    }
     _Py_DEC_REFTOTAL;
     _Py_ForgetReference(unicode);
 
@@ -701,6 +751,9 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
         PyObject_DEL(_PyUnicode_WSTR(unicode));
         _PyUnicode_WSTR(unicode) = NULL;
     }
+#ifdef Py_DEBUG
+    unicode_fill_invalid(unicode, old_length);
+#endif
     PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
                     length, 0);
     assert(_PyUnicode_CheckConsistency(unicode, 0));
@@ -719,6 +772,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
         Py_ssize_t char_size;
         int share_wstr, share_utf8;
         void *data;
+#ifdef Py_DEBUG
+        Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
 
         data = _PyUnicode_DATA_ANY(unicode);
         char_size = PyUnicode_KIND(unicode);
@@ -754,6 +810,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
         }
         _PyUnicode_LENGTH(unicode) = length;
         PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
+#ifdef Py_DEBUG
+        unicode_fill_invalid(unicode, old_length);
+#endif
         if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) {
             assert(_PyUnicode_CheckConsistency(unicode, 0));
             return 0;
@@ -806,8 +865,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
             return NULL;
         copy_length = _PyUnicode_WSTR_LENGTH(unicode);
         copy_length = Py_MIN(copy_length, length);
-        Py_UNICODE_COPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode),
-                        copy_length);
+        Py_MEMCPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode),
+                  copy_length * sizeof(wchar_t));
         return w;
     }
 }
@@ -824,7 +883,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
 static PyUnicodeObject *
 _PyUnicode_New(Py_ssize_t length)
 {
-    register PyUnicodeObject *unicode;
+    PyUnicodeObject *unicode;
     size_t new_size;
 
     /* Optimization for empty strings */
@@ -847,6 +906,19 @@ _PyUnicode_New(Py_ssize_t length)
     if (unicode == NULL)
         return NULL;
     new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
+
+    _PyUnicode_WSTR_LENGTH(unicode) = length;
+    _PyUnicode_HASH(unicode) = -1;
+    _PyUnicode_STATE(unicode).interned = 0;
+    _PyUnicode_STATE(unicode).kind = 0;
+    _PyUnicode_STATE(unicode).compact = 0;
+    _PyUnicode_STATE(unicode).ready = 0;
+    _PyUnicode_STATE(unicode).ascii = 0;
+    _PyUnicode_DATA_ANY(unicode) = NULL;
+    _PyUnicode_LENGTH(unicode) = 0;
+    _PyUnicode_UTF8(unicode) = NULL;
+    _PyUnicode_UTF8_LENGTH(unicode) = 0;
+
     _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size);
     if (!_PyUnicode_WSTR(unicode)) {
         Py_DECREF(unicode);
@@ -863,17 +935,7 @@ _PyUnicode_New(Py_ssize_t length)
      */
     _PyUnicode_WSTR(unicode)[0] = 0;
     _PyUnicode_WSTR(unicode)[length] = 0;
-    _PyUnicode_WSTR_LENGTH(unicode) = length;
-    _PyUnicode_HASH(unicode) = -1;
-    _PyUnicode_STATE(unicode).interned = 0;
-    _PyUnicode_STATE(unicode).kind = 0;
-    _PyUnicode_STATE(unicode).compact = 0;
-    _PyUnicode_STATE(unicode).ready = 0;
-    _PyUnicode_STATE(unicode).ascii = 0;
-    _PyUnicode_DATA_ANY(unicode) = NULL;
-    _PyUnicode_LENGTH(unicode) = 0;
-    _PyUnicode_UTF8(unicode) = NULL;
-    _PyUnicode_UTF8_LENGTH(unicode) = 0;
+
     assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0));
     return unicode;
 }
@@ -954,17 +1016,19 @@ _PyUnicode_Dump(PyObject *op)
     }
     else
         data = unicode->data.any;
-    printf("%s: len=%zu, ",unicode_kind_name(op), ascii->length);
+    printf("%s: len=%" PY_FORMAT_SIZE_T "u, ",
+           unicode_kind_name(op), ascii->length);
 
     if (ascii->wstr == data)
         printf("shared ");
     printf("wstr=%p", ascii->wstr);
 
     if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) {
-        printf(" (%zu), ", compact->wstr_length);
+        printf(" (%" PY_FORMAT_SIZE_T "u), ", compact->wstr_length);
         if (!ascii->state.compact && compact->utf8 == unicode->data.any)
             printf("shared ");
-        printf("utf8=%p (%zu)", compact->utf8, compact->utf8_length);
+        printf("utf8=%p (%" PY_FORMAT_SIZE_T "u)",
+               compact->utf8, compact->utf8_length);
     }
     printf(", data=%p\n", data);
 }
@@ -1078,11 +1142,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
         }
     }
 #ifdef Py_DEBUG
-    /* Fill the data with invalid characters to detect bugs earlier.
-       _PyUnicode_CheckConsistency(str, 1) detects invalid characters,
-       at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
-       and U+FFFFFFFF is an invalid character in Unicode 6.0. */
-    memset(data, 0xff, size * kind);
+    unicode_fill_invalid((PyObject*)unicode, 0);
 #endif
     assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
     return obj;
@@ -1480,6 +1540,10 @@ _PyUnicode_Ready(PyObject *unicode)
         /* in case the native representation is 2-bytes, we need to allocate a
            new normalized 4-byte version. */
         length_wo_surrogates = _PyUnicode_WSTR_LENGTH(unicode) - num_surrogates;
+        if (length_wo_surrogates > PY_SSIZE_T_MAX / 4 - 1) {
+            PyErr_NoMemory();
+            return -1;
+        }
         _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC(4 * (length_wo_surrogates + 1));
         if (!_PyUnicode_DATA_ANY(unicode)) {
             PyErr_NoMemory();
@@ -1512,7 +1576,7 @@ _PyUnicode_Ready(PyObject *unicode)
 }
 
 static void
-unicode_dealloc(register PyObject *unicode)
+unicode_dealloc(PyObject *unicode)
 {
     switch (PyUnicode_CHECK_INTERNED(unicode)) {
     case SSTATE_NOT_INTERNED:
@@ -1644,39 +1708,7 @@ PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length)
     return unicode_resize(p_unicode, length);
 }
 
-static int
-unicode_widen(PyObject **p_unicode, Py_ssize_t length,
-              unsigned int maxchar)
-{
-    PyObject *result;
-    assert(PyUnicode_IS_READY(*p_unicode));
-    assert(length <= PyUnicode_GET_LENGTH(*p_unicode));
-    if (maxchar <= PyUnicode_MAX_CHAR_VALUE(*p_unicode))
-        return 0;
-    result = PyUnicode_New(PyUnicode_GET_LENGTH(*p_unicode),
-                           maxchar);
-    if (result == NULL)
-        return -1;
-    _PyUnicode_FastCopyCharacters(result, 0, *p_unicode, 0, length);
-    Py_DECREF(*p_unicode);
-    *p_unicode = result;
-    return 0;
-}
-
-static int
-unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos,
-                Py_UCS4 ch)
-{
-    assert(ch <= MAX_UNICODE);
-    if (unicode_widen(p_unicode, *pos, ch) < 0)
-        return -1;
-    PyUnicode_WRITE(PyUnicode_KIND(*p_unicode),
-                    PyUnicode_DATA(*p_unicode),
-                    (*pos)++, ch);
-    return 0;
-}
-
-/* Copy a ASCII or latin1 char* string into a Python Unicode string.
+/* Copy an ASCII or latin1 char* string into a Python Unicode string.
 
    WARNING: The function doesn't copy the terminating null character and
    doesn't check the maximum character (may write a latin1 character in an
@@ -1692,6 +1724,14 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
     switch (kind) {
     case PyUnicode_1BYTE_KIND: {
         assert(index + len <= PyUnicode_GET_LENGTH(unicode));
+#ifdef Py_DEBUG
+        if (PyUnicode_IS_ASCII(unicode)) {
+            Py_UCS4 maxchar = ucs1lib_find_max_char(
+                (const Py_UCS1*)str,
+                (const Py_UCS1*)str + len);
+            assert(maxchar < 128);
+        }
+#endif
         memcpy((char *) data + index, str, len);
         break;
     }
@@ -1720,7 +1760,6 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
     }
 }
 
-
 static PyObject*
 get_latin1_char(unsigned char ch)
 {
@@ -1737,6 +1776,34 @@ get_latin1_char(unsigned char ch)
     return unicode;
 }
 
+static PyObject*
+unicode_char(Py_UCS4 ch)
+{
+    PyObject *unicode;
+
+    assert(ch <= MAX_UNICODE);
+
+    if (ch < 256)
+        return get_latin1_char(ch);
+
+    unicode = PyUnicode_New(1, ch);
+    if (unicode == NULL)
+        return NULL;
+    switch (PyUnicode_KIND(unicode)) {
+    case PyUnicode_1BYTE_KIND:
+        PyUnicode_1BYTE_DATA(unicode)[0] = (Py_UCS1)ch;
+        break;
+    case PyUnicode_2BYTE_KIND:
+        PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
+        break;
+    default:
+        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+        PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+    }
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
+    return unicode;
+}
+
 PyObject *
 PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
 {
@@ -1934,18 +2001,8 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
     if (size == 0)
         _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
-    if (size == 1) {
-        Py_UCS4 ch = u[0];
-        if (ch < 256)
-            return get_latin1_char((unsigned char)ch);
-
-        res = PyUnicode_New(1, ch);
-        if (res == NULL)
-            return NULL;
-        PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
-        assert(_PyUnicode_CheckConsistency(res, 1));
-        return res;
-    }
+    if (size == 1)
+        return unicode_char(u[0]);
 
     max_char = ucs2lib_find_max_char(u, u + size);
     res = PyUnicode_New(size, max_char);
@@ -1970,18 +2027,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
     if (size == 0)
         _Py_RETURN_UNICODE_EMPTY();
     assert(size > 0);
-    if (size == 1) {
-        Py_UCS4 ch = u[0];
-        if (ch < 256)
-            return get_latin1_char((unsigned char)ch);
-
-        res = PyUnicode_New(1, ch);
-        if (res == NULL)
-            return NULL;
-        PyUnicode_WRITE(PyUnicode_KIND(res), PyUnicode_DATA(res), 0, ch);
-        assert(_PyUnicode_CheckConsistency(res, 1));
-        return res;
-    }
+    if (size == 1)
+        return unicode_char(u[0]);
 
     max_char = ucs4lib_find_max_char(u, u + size);
     res = PyUnicode_New(size, max_char);
@@ -2148,7 +2195,7 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
     }
     switch (kind) {
     case PyUnicode_2BYTE_KIND:
-        result = PyMem_Malloc(len * sizeof(Py_UCS2));
+        result = PyMem_New(Py_UCS2, len);
         if (!result)
             return PyErr_NoMemory();
         assert(skind == PyUnicode_1BYTE_KIND);
@@ -2159,7 +2206,7 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
             result);
         return result;
     case PyUnicode_4BYTE_KIND:
-        result = PyMem_Malloc(len * sizeof(Py_UCS4));
+        result = PyMem_New(Py_UCS4, len);
         if (!result)
             return PyErr_NoMemory();
         if (skind == PyUnicode_2BYTE_KIND) {
@@ -2201,11 +2248,7 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
     if (copy_null)
         targetlen++;
     if (!target) {
-        if (PY_SSIZE_T_MAX / sizeof(Py_UCS4) < targetlen) {
-            PyErr_NoMemory();
-            return NULL;
-        }
-        target = PyMem_Malloc(targetlen * sizeof(Py_UCS4));
+        target = PyMem_New(Py_UCS4, targetlen);
         if (!target) {
             PyErr_NoMemory();
             return NULL;
@@ -2257,7 +2300,7 @@ PyUnicode_AsUCS4Copy(PyObject *string)
 #ifdef HAVE_WCHAR_H
 
 PyObject *
-PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
+PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size)
 {
     if (w == NULL) {
         if (size == 0)
@@ -2277,16 +2320,9 @@ PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
 
 static void
 makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
-        int zeropad, int width, int precision, char c)
+        char c)
 {
     *fmt++ = '%';
-    if (width) {
-        if (zeropad)
-            *fmt++ = '0';
-        fmt += sprintf(fmt, "%d", width);
-    }
-    if (precision)
-        fmt += sprintf(fmt, ".%d", precision);
     if (longflag)
         *fmt++ = 'l';
     else if (longlongflag) {
@@ -2311,46 +2347,139 @@ makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
     *fmt = '\0';
 }
 
-/* helper for PyUnicode_FromFormatV() */
+/* maximum number of characters required for output of %lld or %p.
+   We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
+   plus 1 for the sign.  53/22 is an upper bound for log10(256). */
+#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
+
+static int
+unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str,
+                             Py_ssize_t width, Py_ssize_t precision)
+{
+    Py_ssize_t length, fill, arglen;
+    Py_UCS4 maxchar;
+
+    if (PyUnicode_READY(str) == -1)
+        return -1;
+
+    length = PyUnicode_GET_LENGTH(str);
+    if ((precision == -1 || precision >= length)
+        && width <= length)
+        return _PyUnicodeWriter_WriteStr(writer, str);
+
+    if (precision != -1)
+        length = Py_MIN(precision, length);
+
+    arglen = Py_MAX(length, width);
+    if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
+        maxchar = _PyUnicode_FindMaxChar(str, 0, length);
+    else
+        maxchar = writer->maxchar;
+
+    if (_PyUnicodeWriter_Prepare(writer, arglen, maxchar) == -1)
+        return -1;
+
+    if (width > length) {
+        fill = width - length;
+        if (PyUnicode_Fill(writer->buffer, writer->pos, fill, ' ') == -1)
+            return -1;
+        writer->pos += fill;
+    }
+
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  str, 0, length);
+    writer->pos += length;
+    return 0;
+}
+
+static int
+unicode_fromformat_write_cstr(_PyUnicodeWriter *writer, const char *str,
+                              Py_ssize_t width, Py_ssize_t precision)
+{
+    /* UTF-8 */
+    Py_ssize_t length;
+    PyObject *unicode;
+    int res;
+
+    length = strlen(str);
+    if (precision != -1)
+        length = Py_MIN(length, precision);
+    unicode = PyUnicode_DecodeUTF8Stateful(str, length, "replace", NULL);
+    if (unicode == NULL)
+        return -1;
+
+    res = unicode_fromformat_write_str(writer, unicode, width, -1);
+    Py_DECREF(unicode);
+    return res;
+}
 
 static const char*
-parse_format_flags(const char *f,
-                   int *p_width, int *p_precision,
-                   int *p_longflag, int *p_longlongflag, int *p_size_tflag)
+unicode_fromformat_arg(_PyUnicodeWriter *writer,
+                       const char *f, va_list *vargs)
 {
-    int width, precision, longflag, longlongflag, size_tflag;
+    const char *p;
+    Py_ssize_t len;
+    int zeropad;
+    Py_ssize_t width;
+    Py_ssize_t precision;
+    int longflag;
+    int longlongflag;
+    int size_tflag;
+    Py_ssize_t fill;
 
-    /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
+    p = f;
     f++;
-    width = 0;
-    while (Py_ISDIGIT((unsigned)*f))
-        width = (width*10) + *f++ - '0';
-    precision = 0;
+    zeropad = 0;
+    if (*f == '0') {
+        zeropad = 1;
+        f++;
+    }
+
+    /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
+    width = -1;
+    if (Py_ISDIGIT((unsigned)*f)) {
+        width = *f - '0';
+        f++;
+        while (Py_ISDIGIT((unsigned)*f)) {
+            if (width > (PY_SSIZE_T_MAX - ((int)*f - '0')) / 10) {
+                PyErr_SetString(PyExc_ValueError,
+                                "width too big");
+                return NULL;
+            }
+            width = (width * 10) + (*f - '0');
+            f++;
+        }
+    }
+    precision = -1;
     if (*f == '.') {
         f++;
-        while (Py_ISDIGIT((unsigned)*f))
-            precision = (precision*10) + *f++ - '0';
+        if (Py_ISDIGIT((unsigned)*f)) {
+            precision = (*f - '0');
+            f++;
+            while (Py_ISDIGIT((unsigned)*f)) {
+                if (precision > (PY_SSIZE_T_MAX - ((int)*f - '0')) / 10) {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "precision too big");
+                    return NULL;
+                }
+                precision = (precision * 10) + (*f - '0');
+                f++;
+            }
+        }
         if (*f == '%') {
             /* "%.3%s" => f points to "3" */
             f--;
         }
     }
-    if (width < precision)
-        width = precision;
     if (*f == '\0') {
-        /* bogus format "%.1" => go backward, f points to "1" */
+        /* bogus format "%.123" => go backward, f points to "3" */
         f--;
     }
-    if (p_width != NULL)
-        *p_width = width;
-    if (p_precision != NULL)
-        *p_precision = precision;
 
     /* Handle %ld, %lu, %lld and %llu. */
     longflag = 0;
     longlongflag = 0;
     size_tflag = 0;
-
     if (*f == 'l') {
         if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') {
             longflag = 1;
@@ -2369,499 +2498,284 @@ parse_format_flags(const char *f,
         size_tflag = 1;
         ++f;
     }
-    if (p_longflag != NULL)
-        *p_longflag = longflag;
-    if (p_longlongflag != NULL)
-        *p_longlongflag = longlongflag;
-    if (p_size_tflag != NULL)
-        *p_size_tflag = size_tflag;
-    return f;
-}
 
-/* maximum number of characters required for output of %ld.  21 characters
-   allows for 64-bit integers (in decimal) and an optional sign. */
-#define MAX_LONG_CHARS 21
-/* maximum number of characters required for output of %lld.
-   We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
-   plus 1 for the sign.  53/22 is an upper bound for log10(256). */
-#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
+    if (f[1] == '\0')
+        writer->overallocate = 0;
 
-PyObject *
-PyUnicode_FromFormatV(const char *format, va_list vargs)
-{
-    va_list count;
-    Py_ssize_t callcount = 0;
-    PyObject **callresults = NULL;
-    PyObject **callresult = NULL;
-    Py_ssize_t n = 0;
-    int width = 0;
-    int precision = 0;
-    int zeropad;
-    const char* f;
-    PyObject *string;
-    /* used by sprintf */
-    char fmt[61]; /* should be enough for %0width.precisionlld */
-    Py_UCS4 maxchar = 127; /* result is ASCII by default */
-    Py_UCS4 argmaxchar;
-    Py_ssize_t numbersize = 0;
-    char *numberresults = NULL;
-    char *numberresult = NULL;
-    Py_ssize_t i;
-    int kind;
-    void *data;
+    switch (*f) {
+    case 'c':
+    {
+        int ordinal = va_arg(*vargs, int);
+        if (ordinal < 0 || ordinal > MAX_UNICODE) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "character argument not in range(0x110000)");
+            return NULL;
+        }
+        if (_PyUnicodeWriter_WriteCharInline(writer, ordinal) < 0)
+            return NULL;
+        break;
+    }
 
-    Py_VA_COPY(count, vargs);
-    /* step 1: count the number of %S/%R/%A/%s format specifications
-     * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
-     * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
-     * result in an array)
-     * also estimate a upper bound for all the number formats in the string,
-     * numbers will be formatted in step 3 and be kept in a '\0'-separated
-     * buffer before putting everything together. */
-    for (f = format; *f; f++) {
-        if (*f == '%') {
-            int longlongflag;
-            /* skip width or width.precision (eg. "1.2" of "%1.2f") */
-            f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL);
-            if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
-                ++callcount;
+    case 'i':
+    case 'd':
+    case 'u':
+    case 'x':
+    {
+        /* used by sprintf */
+        char fmt[10]; /* should be enough for "%0lld\0" */
+        char buffer[MAX_LONG_LONG_CHARS];
+        Py_ssize_t arglen;
+
+        if (*f == 'u') {
+            makefmt(fmt, longflag, longlongflag, size_tflag, *f);
 
-            else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') {
+            if (longflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, unsigned long));
 #ifdef HAVE_LONG_LONG
-                if (longlongflag) {
-                    if (width < MAX_LONG_LONG_CHARS)
-                        width = MAX_LONG_LONG_CHARS;
-                }
-                else
+            else if (longlongflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, unsigned PY_LONG_LONG));
 #endif
-                    /* MAX_LONG_CHARS is enough to hold a 64-bit integer,
-                       including sign.  Decimal takes the most space.  This
-                       isn't enough for octal.  If a width is specified we
-                       need more (which we allocate later). */
-                    if (width < MAX_LONG_CHARS)
-                        width = MAX_LONG_CHARS;
-
-                /* account for the size + '\0' to separate numbers
-                   inside of the numberresults buffer */
-                numbersize += (width + 1);
-            }
+            else if (size_tflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, size_t));
+            else
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, unsigned int));
+        }
+        else if (*f == 'x') {
+            makefmt(fmt, 0, 0, 0, 'x');
+            len = sprintf(buffer, fmt, va_arg(*vargs, int));
+        }
+        else {
+            makefmt(fmt, longflag, longlongflag, size_tflag, *f);
+
+            if (longflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, long));
+#ifdef HAVE_LONG_LONG
+            else if (longlongflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, PY_LONG_LONG));
+#endif
+            else if (size_tflag)
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, Py_ssize_t));
+            else
+                len = sprintf(buffer, fmt,
+                        va_arg(*vargs, int));
+        }
+        assert(len >= 0);
+
+        if (precision < len)
+            precision = len;
+
+        arglen = Py_MAX(precision, width);
+        if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
+            return NULL;
+
+        if (width > precision) {
+            Py_UCS4 fillchar;
+            fill = width - precision;
+            fillchar = zeropad?'0':' ';
+            if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1)
+                return NULL;
+            writer->pos += fill;
+        }
+        if (precision > len) {
+            fill = precision - len;
+            if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1)
+                return NULL;
+            writer->pos += fill;
         }
-        else if ((unsigned char)*f > 127) {
-            PyErr_Format(PyExc_ValueError,
-                "PyUnicode_FromFormatV() expects an ASCII-encoded format "
-                "string, got a non-ASCII byte: 0x%02x",
-                (unsigned char)*f);
+
+        if (_PyUnicodeWriter_WriteASCIIString(writer, buffer, len) < 0)
             return NULL;
+        break;
+    }
+
+    case 'p':
+    {
+        char number[MAX_LONG_LONG_CHARS];
+
+        len = sprintf(number, "%p", va_arg(*vargs, void*));
+        assert(len >= 0);
+
+        /* %p is ill-defined:  ensure leading 0x. */
+        if (number[1] == 'X')
+            number[1] = 'x';
+        else if (number[1] != 'x') {
+            memmove(number + 2, number,
+                    strlen(number) + 1);
+            number[0] = '0';
+            number[1] = 'x';
+            len += 2;
         }
+
+        if (_PyUnicodeWriter_WriteASCIIString(writer, number, len) < 0)
+            return NULL;
+        break;
     }
-    /* step 2: allocate memory for the results of
-     * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
-    if (callcount) {
-        callresults = PyObject_Malloc(sizeof(PyObject *) * callcount);
-        if (!callresults) {
-            PyErr_NoMemory();
+
+    case 's':
+    {
+        /* UTF-8 */
+        const char *s = va_arg(*vargs, const char*);
+        if (unicode_fromformat_write_cstr(writer, s, width, precision) < 0)
+            return NULL;
+        break;
+    }
+
+    case 'U':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        assert(obj && _PyUnicode_CHECK(obj));
+
+        if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
             return NULL;
+        break;
+    }
+
+    case 'V':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        const char *str = va_arg(*vargs, const char *);
+        if (obj) {
+            assert(_PyUnicode_CHECK(obj));
+            if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
+                return NULL;
         }
-        callresult = callresults;
+        else {
+            assert(str != NULL);
+            if (unicode_fromformat_write_cstr(writer, str, width, precision) < 0)
+                return NULL;
+        }
+        break;
     }
-    /* step 2.5: allocate memory for the results of formating numbers */
-    if (numbersize) {
-        numberresults = PyObject_Malloc(numbersize);
-        if (!numberresults) {
-            PyErr_NoMemory();
-            goto fail;
+
+    case 'S':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        PyObject *str;
+        assert(obj);
+        str = PyObject_Str(obj);
+        if (!str)
+            return NULL;
+        if (unicode_fromformat_write_str(writer, str, width, precision) == -1) {
+            Py_DECREF(str);
+            return NULL;
         }
-        numberresult = numberresults;
+        Py_DECREF(str);
+        break;
     }
 
-    /* step 3: format numbers and figure out how large a buffer we need */
-    for (f = format; *f; f++) {
-        if (*f == '%') {
-            const char* p;
-            int longflag;
-            int longlongflag;
-            int size_tflag;
-            int numprinted;
+    case 'R':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        PyObject *repr;
+        assert(obj);
+        repr = PyObject_Repr(obj);
+        if (!repr)
+            return NULL;
+        if (unicode_fromformat_write_str(writer, repr, width, precision) == -1) {
+            Py_DECREF(repr);
+            return NULL;
+        }
+        Py_DECREF(repr);
+        break;
+    }
 
-            p = f;
-            zeropad = (f[1] == '0');
-            f = parse_format_flags(f, &width, &precision,
-                                   &longflag, &longlongflag, &size_tflag);
-            switch (*f) {
-            case 'c':
-            {
-                int ordinal = va_arg(count, int);
-                if (ordinal < 0 || ordinal > MAX_UNICODE) {
-                    PyErr_SetString(PyExc_OverflowError,
-                                    "%c arg not in range(0x110000)");
-                    goto fail;
-                }
-                maxchar = Py_MAX(maxchar, (Py_UCS4)ordinal);
-                n++;
-                break;
-            }
-            case '%':
-                n++;
-                break;
-            case 'i':
-            case 'd':
-                makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
-                        width, precision, *f);
-                if (longflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, long));
-#ifdef HAVE_LONG_LONG
-                else if (longlongflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, PY_LONG_LONG));
-#endif
-                else if (size_tflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, Py_ssize_t));
-                else
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, int));
-                n += numprinted;
-                /* advance by +1 to skip over the '\0' */
-                numberresult += (numprinted + 1);
-                assert(*(numberresult - 1) == '\0');
-                assert(*(numberresult - 2) != '\0');
-                assert(numprinted >= 0);
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            case 'u':
-                makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
-                        width, precision, 'u');
-                if (longflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, unsigned long));
-#ifdef HAVE_LONG_LONG
-                else if (longlongflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, unsigned PY_LONG_LONG));
-#endif
-                else if (size_tflag)
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, size_t));
-                else
-                    numprinted = sprintf(numberresult, fmt,
-                                         va_arg(count, unsigned int));
-                n += numprinted;
-                numberresult += (numprinted + 1);
-                assert(*(numberresult - 1) == '\0');
-                assert(*(numberresult - 2) != '\0');
-                assert(numprinted >= 0);
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            case 'x':
-                makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x');
-                numprinted = sprintf(numberresult, fmt, va_arg(count, int));
-                n += numprinted;
-                numberresult += (numprinted + 1);
-                assert(*(numberresult - 1) == '\0');
-                assert(*(numberresult - 2) != '\0');
-                assert(numprinted >= 0);
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            case 'p':
-                numprinted = sprintf(numberresult, "%p", va_arg(count, void*));
-                /* %p is ill-defined:  ensure leading 0x. */
-                if (numberresult[1] == 'X')
-                    numberresult[1] = 'x';
-                else if (numberresult[1] != 'x') {
-                    memmove(numberresult + 2, numberresult,
-                            strlen(numberresult) + 1);
-                    numberresult[0] = '0';
-                    numberresult[1] = 'x';
-                    numprinted += 2;
-                }
-                n += numprinted;
-                numberresult += (numprinted + 1);
-                assert(*(numberresult - 1) == '\0');
-                assert(*(numberresult - 2) != '\0');
-                assert(numprinted >= 0);
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            case 's':
-            {
-                /* UTF-8 */
-                const char *s = va_arg(count, const char*);
-                PyObject *str = PyUnicode_DecodeUTF8Stateful(s, strlen(s), "replace", NULL);
-                if (!str)
-                    goto fail;
-                /* since PyUnicode_DecodeUTF8 returns already flexible
-                   unicode objects, there is no need to call ready on them */
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(str);
-                /* Remember the str and switch to the next slot */
-                *callresult++ = str;
-                break;
-            }
-            case 'U':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                assert(obj && _PyUnicode_CHECK(obj));
-                if (PyUnicode_READY(obj) == -1)
-                    goto fail;
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(obj);
-                break;
-            }
-            case 'V':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                const char *str = va_arg(count, const char *);
-                PyObject *str_obj;
-                assert(obj || str);
-                assert(!obj || _PyUnicode_CHECK(obj));
-                if (obj) {
-                    if (PyUnicode_READY(obj) == -1)
-                        goto fail;
-                    argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
-                    maxchar = Py_MAX(maxchar, argmaxchar);
-                    n += PyUnicode_GET_LENGTH(obj);
-                    *callresult++ = NULL;
-                }
-                else {
-                    str_obj = PyUnicode_DecodeUTF8Stateful(str, strlen(str), "replace", NULL);
-                    if (!str_obj)
-                        goto fail;
-                    if (PyUnicode_READY(str_obj) == -1) {
-                        Py_DECREF(str_obj);
-                        goto fail;
-                    }
-                    argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
-                    maxchar = Py_MAX(maxchar, argmaxchar);
-                    n += PyUnicode_GET_LENGTH(str_obj);
-                    *callresult++ = str_obj;
-                }
-                break;
-            }
-            case 'S':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                PyObject *str;
-                assert(obj);
-                str = PyObject_Str(obj);
-                if (!str)
-                    goto fail;
-                if (PyUnicode_READY(str) == -1) {
-                    Py_DECREF(str);
-                    goto fail;
-                }
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(str);
-                /* Remember the str and switch to the next slot */
-                *callresult++ = str;
-                break;
-            }
-            case 'R':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                PyObject *repr;
-                assert(obj);
-                repr = PyObject_Repr(obj);
-                if (!repr)
-                    goto fail;
-                if (PyUnicode_READY(repr) == -1) {
-                    Py_DECREF(repr);
-                    goto fail;
-                }
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(repr);
-                /* Remember the repr and switch to the next slot */
-                *callresult++ = repr;
-                break;
-            }
-            case 'A':
-            {
-                PyObject *obj = va_arg(count, PyObject *);
-                PyObject *ascii;
-                assert(obj);
-                ascii = PyObject_ASCII(obj);
-                if (!ascii)
-                    goto fail;
-                if (PyUnicode_READY(ascii) == -1) {
-                    Py_DECREF(ascii);
-                    goto fail;
-                }
-                argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
-                maxchar = Py_MAX(maxchar, argmaxchar);
-                n += PyUnicode_GET_LENGTH(ascii);
-                /* Remember the repr and switch to the next slot */
-                *callresult++ = ascii;
-                break;
-            }
-            default:
-                /* if we stumble upon an unknown
-                   formatting code, copy the rest of
-                   the format string to the output
-                   string. (we cannot just skip the
-                   code, since there's no way to know
-                   what's in the argument list) */
-                n += strlen(p);
-                goto expand;
-            }
-        } else
-            n++;
-    }
-  expand:
-    /* step 4: fill the buffer */
-    /* Since we've analyzed how much space we need,
-       we don't have to resize the string.
-       There can be no errors beyond this point. */
-    string = PyUnicode_New(n, maxchar);
-    if (!string)
-        goto fail;
-    kind = PyUnicode_KIND(string);
-    data = PyUnicode_DATA(string);
-    callresult = callresults;
-    numberresult = numberresults;
+    case 'A':
+    {
+        PyObject *obj = va_arg(*vargs, PyObject *);
+        PyObject *ascii;
+        assert(obj);
+        ascii = PyObject_ASCII(obj);
+        if (!ascii)
+            return NULL;
+        if (unicode_fromformat_write_str(writer, ascii, width, precision) == -1) {
+            Py_DECREF(ascii);
+            return NULL;
+        }
+        Py_DECREF(ascii);
+        break;
+    }
 
-    for (i = 0, f = format; *f; f++) {
+    case '%':
+        if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
+            return NULL;
+        break;
+
+    default:
+        /* if we stumble upon an unknown formatting code, copy the rest
+           of the format string to the output string. (we cannot just
+           skip the code, since there's no way to know what's in the
+           argument list) */
+        len = strlen(p);
+        if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1)
+            return NULL;
+        f = p+len;
+        return f;
+    }
+
+    f++;
+    return f;
+}
+
+PyObject *
+PyUnicode_FromFormatV(const char *format, va_list vargs)
+{
+    va_list vargs2;
+    const char *f;
+    _PyUnicodeWriter writer;
+
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = strlen(format) + 100;
+    writer.overallocate = 1;
+
+    /* va_list may be an array (of 1 item) on some platforms (ex: AMD64).
+       Copy it to be able to pass a reference to a subfunction. */
+    Py_VA_COPY(vargs2, vargs);
+
+    for (f = format; *f; ) {
         if (*f == '%') {
-            const char* p;
+            f = unicode_fromformat_arg(&writer, f, &vargs2);
+            if (f == NULL)
+                goto fail;
+        }
+        else {
+            const char *p;
+            Py_ssize_t len;
 
             p = f;
-            f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL);
-            /* checking for == because the last argument could be a empty
-               string, which causes i to point to end, the assert at the end of
-               the loop */
-            assert(i <= PyUnicode_GET_LENGTH(string));
-
-            switch (*f) {
-            case 'c':
-            {
-                const int ordinal = va_arg(vargs, int);
-                PyUnicode_WRITE(kind, data, i++, ordinal);
-                break;
-            }
-            case 'i':
-            case 'd':
-            case 'u':
-            case 'x':
-            case 'p':
-            {
-                Py_ssize_t len;
-                /* unused, since we already have the result */
-                if (*f == 'p')
-                    (void) va_arg(vargs, void *);
-                else
-                    (void) va_arg(vargs, int);
-                /* extract the result from numberresults and append. */
-                len = strlen(numberresult);
-                unicode_write_cstr(string, i, numberresult, len);
-                /* skip over the separating '\0' */
-                i += len;
-                numberresult += len;
-                assert(*numberresult == '\0');
-                numberresult++;
-                assert(numberresult <= numberresults + numbersize);
-                break;
-            }
-            case 's':
-            {
-                /* unused, since we already have the result */
-                Py_ssize_t size;
-                (void) va_arg(vargs, char *);
-                size = PyUnicode_GET_LENGTH(*callresult);
-                assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
-                _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
-                i += size;
-                /* We're done with the unicode()/repr() => forget it */
-                Py_DECREF(*callresult);
-                /* switch to next unicode()/repr() result */
-                ++callresult;
-                break;
-            }
-            case 'U':
-            {
-                PyObject *obj = va_arg(vargs, PyObject *);
-                Py_ssize_t size;
-                assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
-                size = PyUnicode_GET_LENGTH(obj);
-                _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
-                i += size;
-                break;
-            }
-            case 'V':
+            do
             {
-                Py_ssize_t size;
-                PyObject *obj = va_arg(vargs, PyObject *);
-                va_arg(vargs, const char *);
-                if (obj) {
-                    size = PyUnicode_GET_LENGTH(obj);
-                    assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
-                    _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
-                    i += size;
-                } else {
-                    size = PyUnicode_GET_LENGTH(*callresult);
-                    assert(PyUnicode_KIND(*callresult) <=
-                           PyUnicode_KIND(string));
-                    _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
-                    i += size;
-                    Py_DECREF(*callresult);
+                if ((unsigned char)*p > 127) {
+                    PyErr_Format(PyExc_ValueError,
+                        "PyUnicode_FromFormatV() expects an ASCII-encoded format "
+                        "string, got a non-ASCII byte: 0x%02x",
+                        (unsigned char)*p);
+                    return NULL;
                 }
-                ++callresult;
-                break;
-            }
-            case 'S':
-            case 'R':
-            case 'A':
-            {
-                Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult);
-                /* unused, since we already have the result */
-                (void) va_arg(vargs, PyObject *);
-                assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
-                _PyUnicode_FastCopyCharacters(string, i, *callresult, 0,  size);
-                i += size;
-                /* We're done with the unicode()/repr() => forget it */
-                Py_DECREF(*callresult);
-                /* switch to next unicode()/repr() result */
-                ++callresult;
-                break;
+                p++;
             }
-            case '%':
-                PyUnicode_WRITE(kind, data, i++, '%');
-                break;
-            default:
-            {
-                Py_ssize_t len = strlen(p);
-                unicode_write_cstr(string, i, p, len);
-                i += len;
-                assert(i == PyUnicode_GET_LENGTH(string));
-                goto end;
-            }
-            }
-        }
-        else {
-            assert(i < PyUnicode_GET_LENGTH(string));
-            PyUnicode_WRITE(kind, data, i++, *f);
+            while (*p != '\0' && *p != '%');
+            len = p - f;
+
+            if (*p == '\0')
+                writer.overallocate = 0;
+
+            if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0)
+                goto fail;
+
+            f = p;
         }
     }
-    assert(i == PyUnicode_GET_LENGTH(string));
+    return _PyUnicodeWriter_Finish(&writer);
 
-  end:
-    if (callresults)
-        PyObject_Free(callresults);
-    if (numberresults)
-        PyObject_Free(numberresults);
-    return unicode_result(string);
   fail:
-    if (callresults) {
-        PyObject **callresult2 = callresults;
-        while (callresult2 < callresult) {
-            Py_XDECREF(*callresult2);
-            ++callresult2;
-        }
-        PyObject_Free(callresults);
-    }
-    if (numberresults)
-        PyObject_Free(numberresults);
+    _PyUnicodeWriter_Dealloc(&writer);
     return NULL;
 }
 
@@ -2943,12 +2857,7 @@ PyUnicode_AsWideCharString(PyObject *unicode,
     buflen = unicode_aswidechar(unicode, NULL, 0);
     if (buflen == -1)
         return NULL;
-    if (PY_SSIZE_T_MAX / sizeof(wchar_t) < buflen) {
-        PyErr_NoMemory();
-        return NULL;
-    }
-
-    buffer = PyMem_MALLOC(buflen * sizeof(wchar_t));
+    buffer = PyMem_NEW(wchar_t, buflen);
     if (buffer == NULL) {
         PyErr_NoMemory();
         return NULL;
@@ -2968,26 +2877,17 @@ PyUnicode_AsWideCharString(PyObject *unicode,
 PyObject *
 PyUnicode_FromOrdinal(int ordinal)
 {
-    PyObject *v;
     if (ordinal < 0 || ordinal > MAX_UNICODE) {
         PyErr_SetString(PyExc_ValueError,
                         "chr() arg not in range(0x110000)");
         return NULL;
     }
 
-    if ((Py_UCS4)ordinal < 256)
-        return get_latin1_char((unsigned char)ordinal);
-
-    v = PyUnicode_New(1, ordinal);
-    if (v == NULL)
-        return NULL;
-    PyUnicode_WRITE(PyUnicode_KIND(v), PyUnicode_DATA(v), 0, ordinal);
-    assert(_PyUnicode_CheckConsistency(v, 1));
-    return v;
+    return unicode_char((Py_UCS4)ordinal);
 }
 
 PyObject *
-PyUnicode_FromObject(register PyObject *obj)
+PyUnicode_FromObject(PyObject *obj)
 {
     /* XXX Perhaps we should make this API an alias of
        PyObject_Str() instead ?! */
@@ -3009,7 +2909,7 @@ PyUnicode_FromObject(register PyObject *obj)
 }
 
 PyObject *
-PyUnicode_FromEncodedObject(register PyObject *obj,
+PyUnicode_FromEncodedObject(PyObject *obj,
                             const char *encoding,
                             const char *errors)
 {
@@ -3040,8 +2940,7 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
     /* Retrieve a bytes buffer view through the PEP 3118 buffer interface */
     if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE) < 0) {
         PyErr_Format(PyExc_TypeError,
-                     "coercing to str: need bytes, bytearray "
-                     "or buffer-like object, %.80s found",
+                     "coercing to str: need a bytes-like object, %.80s found",
                      Py_TYPE(obj)->tp_name);
         return NULL;
     }
@@ -3061,14 +2960,17 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
    1 on success. */
 int
 _Py_normalize_encoding(const char *encoding,
-                   char *lower,
-                   size_t lower_len)
+                       char *lower,
+                       size_t lower_len)
 {
     const char *e;
     char *l;
     char *l_end;
 
     if (encoding == NULL) {
+        /* 6 == strlen("utf-8") + 1 */
+        if (lower_len < 6)
+            return 0;
         strcpy(lower, "utf-8");
         return 1;
     }
@@ -3110,7 +3012,8 @@ PyUnicode_Decode(const char *s,
             return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
         else if ((strcmp(lower, "latin-1") == 0) ||
                  (strcmp(lower, "latin1") == 0) ||
-                 (strcmp(lower, "iso-8859-1") == 0))
+                 (strcmp(lower, "iso-8859-1") == 0) ||
+                 (strcmp(lower, "iso8859-1") == 0))
             return PyUnicode_DecodeLatin1(s, size, errors);
 #ifdef HAVE_MBCS
         else if (strcmp(lower, "mbcs") == 0)
@@ -3136,8 +3039,10 @@ PyUnicode_Decode(const char *s,
         goto onError;
     if (!PyUnicode_Check(unicode)) {
         PyErr_Format(PyExc_TypeError,
-                     "decoder did not return a str object (type=%.400s)",
-                     Py_TYPE(unicode)->tp_name);
+                     "'%.400s' decoder returned '%.400s' instead of 'str'; "
+                     "use codecs.decode() to decode to arbitrary types",
+                     encoding,
+                     Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name);
         Py_DECREF(unicode);
         goto onError;
     }
@@ -3195,8 +3100,10 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode,
         goto onError;
     if (!PyUnicode_Check(v)) {
         PyErr_Format(PyExc_TypeError,
-                     "decoder did not return a str object (type=%.400s)",
-                     Py_TYPE(v)->tp_name);
+                     "'%.400s' decoder returned '%.400s' instead of 'str'; "
+                     "use codecs.decode() to decode to arbitrary types",
+                     encoding,
+                     Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name);
         Py_DECREF(v);
         goto onError;
     }
@@ -3344,7 +3251,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
     }
 
     if (surrogateescape) {
-        /* locale encoding with surrogateescape */
+        /* "surrogateescape" error handler */
         char *str;
 
         str = _Py_wchar2char(wstr, &error_pos);
@@ -3364,6 +3271,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
         PyMem_Free(str);
     }
     else {
+        /* strict mode */
         size_t len, len2;
 
         len = wcstombs(NULL, wstr, 0);
@@ -3402,7 +3310,7 @@ encode_error:
         wstr = _Py_char2wchar(errmsg, &errlen);
         if (wstr != NULL) {
             reason = PyUnicode_FromWideChar(wstr, errlen);
-            PyMem_Free(wstr);
+            PyMem_RawFree(wstr);
         } else
             errmsg = NULL;
     }
@@ -3480,7 +3388,8 @@ PyUnicode_AsEncodedString(PyObject *unicode,
         }
         else if ((strcmp(lower, "latin-1") == 0) ||
                  (strcmp(lower, "latin1") == 0) ||
-                 (strcmp(lower, "iso-8859-1") == 0))
+                 (strcmp(lower, "iso-8859-1") == 0) ||
+                 (strcmp(lower, "iso8859-1") == 0))
             return _PyUnicode_AsLatin1String(unicode, errors);
 #ifdef HAVE_MBCS
         else if (strcmp(lower, "mbcs") == 0)
@@ -3505,7 +3414,8 @@ PyUnicode_AsEncodedString(PyObject *unicode,
         PyObject *b;
 
         error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
-            "encoder %s returned bytearray instead of bytes",
+            "encoder %s returned bytearray instead of bytes; "
+            "use codecs.encode() to encode to arbitrary types",
             encoding);
         if (error) {
             Py_DECREF(v);
@@ -3518,8 +3428,10 @@ PyUnicode_AsEncodedString(PyObject *unicode,
     }
 
     PyErr_Format(PyExc_TypeError,
-                 "encoder did not return a bytes object (type=%.400s)",
-                 Py_TYPE(v)->tp_name);
+                 "'%.400s' encoder returned '%.400s' instead of 'bytes'; "
+                 "use codecs.encode() to encode to arbitrary types",
+                 encoding,
+                 Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name);
     Py_DECREF(v);
     return NULL;
 }
@@ -3545,8 +3457,10 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
         goto onError;
     if (!PyUnicode_Check(v)) {
         PyErr_Format(PyExc_TypeError,
-                     "encoder did not return an str object (type=%.400s)",
-                     Py_TYPE(v)->tp_name);
+                     "'%.400s' encoder returned '%.400s' instead of 'str'; "
+                     "use codecs.encode() to encode to arbitrary types",
+                     encoding,
+                     Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name);
         Py_DECREF(v);
         goto onError;
     }
@@ -3609,8 +3523,8 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
         return NULL;
     }
 
-    if (surrogateescape)
-    {
+    if (surrogateescape) {
+        /* "surrogateescape" error handler */
         wstr = _Py_char2wchar(str, &wlen);
         if (wstr == NULL) {
             if (wlen == (size_t)-1)
@@ -3621,9 +3535,10 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
         }
 
         unicode = PyUnicode_FromWideChar(wstr, wlen);
-        PyMem_Free(wstr);
+        PyMem_RawFree(wstr);
     }
     else {
+        /* strict mode */
 #ifndef HAVE_BROKEN_MBSTOWCS
         wlen = mbstowcs(NULL, str, 0);
 #else
@@ -3635,15 +3550,11 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
             wstr = smallbuf;
         }
         else {
-            if (wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1)
-                return PyErr_NoMemory();
-
-            wstr = PyMem_Malloc((wlen+1) * sizeof(wchar_t));
+            wstr = PyMem_New(wchar_t, wlen+1);
             if (!wstr)
                 return PyErr_NoMemory();
         }
 
-        /* This shouldn't fail now */
         wlen2 = mbstowcs(wstr, str, wlen+1);
         if (wlen2 == (size_t)-1) {
             if (wstr != smallbuf)
@@ -3660,6 +3571,7 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
     return unicode;
 
 decode_error:
+    reason = NULL;
     errmsg = strerror(errno);
     assert(errmsg != NULL);
 
@@ -3669,11 +3581,10 @@ decode_error:
         wstr = _Py_char2wchar(errmsg, &errlen);
         if (wstr != NULL) {
             reason = PyUnicode_FromWideChar(wstr, errlen);
-            PyMem_Free(wstr);
-        } else
-            errmsg = NULL;
+            PyMem_RawFree(wstr);
+        }
     }
-    if (errmsg == NULL)
+    if (reason == NULL)
         reason = PyUnicode_FromString(
             "mbstowcs() encountered an invalid multibyte sequence");
     if (reason == NULL)
@@ -3737,18 +3648,20 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
 
 
 int
-_PyUnicode_HasNULChars(PyObject* s)
+_PyUnicode_HasNULChars(PyObject* str)
 {
-    static PyObject *nul = NULL;
+    Py_ssize_t pos;
 
-    if (nul == NULL)
-        nul = PyUnicode_FromStringAndSize("\0", 1);
-    if (nul == NULL)
+    if (PyUnicode_READY(str) == -1)
         return -1;
-    return PyUnicode_Contains(s, nul);
+    pos = findchar(PyUnicode_DATA(str), PyUnicode_KIND(str),
+                   PyUnicode_GET_LENGTH(str), '\0', 1);
+    if (pos == -1)
+        return 0;
+    else
+        return 1;
 }
 
-
 int
 PyUnicode_FSConverter(PyObject* arg, void* addr)
 {
@@ -3852,6 +3765,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
             return NULL;
         _PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1);
         if (_PyUnicode_UTF8(unicode) == NULL) {
+            PyErr_NoMemory();
             Py_DECREF(bytes);
             return NULL;
         }
@@ -3941,6 +3855,11 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
 #endif
         }
         else {
+            if ((size_t)_PyUnicode_LENGTH(unicode) >
+                    PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
+                PyErr_NoMemory();
+                return NULL;
+            }
             _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) *
                                                   (_PyUnicode_LENGTH(unicode) + 1));
             if (!_PyUnicode_WSTR(unicode)) {
@@ -4020,6 +3939,9 @@ PyUnicode_GetLength(PyObject *unicode)
 Py_UCS4
 PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
 {
+    void *data;
+    int kind;
+
     if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) {
         PyErr_BadArgument();
         return (Py_UCS4)-1;
@@ -4028,7 +3950,9 @@ PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
         PyErr_SetString(PyExc_IndexError, "string index out of range");
         return (Py_UCS4)-1;
     }
-    return PyUnicode_READ_CHAR(unicode, index);
+    data = PyUnicode_DATA(unicode);
+    kind = PyUnicode_KIND(unicode);
+    return PyUnicode_READ(kind, data, index);
 }
 
 int
@@ -4086,6 +4010,7 @@ onError:
     Py_CLEAR(*exceptionObject);
 }
 
+#ifdef HAVE_MBCS
 /* error handling callback helper:
    build arguments, call the callback and check the arguments,
    if no exception occurred, copy the replacement to the output
@@ -4094,11 +4019,12 @@ onError:
 */
 
 static int
-unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
-                                 const char *encoding, const char *reason,
-                                 const char **input, const char **inend, Py_ssize_t *startinpos,
-                                 Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
-                                 PyObject **output, Py_ssize_t *outpos)
+unicode_decode_call_errorhandler_wchar(
+    const char *errors, PyObject **errorHandler,
+    const char *encoding, const char *reason,
+    const char **input, const char **inend, Py_ssize_t *startinpos,
+    Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
+    PyObject **output, Py_ssize_t *outpos)
 {
     static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
 
@@ -4109,12 +4035,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
     Py_ssize_t requiredsize;
     Py_ssize_t newpos;
     PyObject *inputobj = NULL;
-    int res = -1;
+    wchar_t *repwstr;
+    Py_ssize_t repwlen;
 
-    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND)
-        outsize = PyUnicode_GET_LENGTH(*output);
-    else
-        outsize = _PyUnicode_WSTR_LENGTH(*output);
+    assert (_PyUnicode_KIND(*output) == PyUnicode_WCHAR_KIND);
+    outsize = _PyUnicode_WSTR_LENGTH(*output);
 
     if (*errorHandler == NULL) {
         *errorHandler = PyCodec_LookupError(errors);
@@ -4139,8 +4064,6 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
     }
     if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
         goto onError;
-    if (PyUnicode_READY(repunicode) == -1)
-        goto onError;
 
     /* Copy back the bytes variables, which might have been modified by the
        callback */
@@ -4164,71 +4087,131 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
         goto onError;
     }
 
-    if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) {
-        /* need more space? (at least enough for what we
-           have+the replacement+the rest of the string (starting
-           at the new input position), so we won't have to check space
-           when there are no errors in the rest of the string) */
-        Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
-        requiredsize = *outpos;
-        if (requiredsize > PY_SSIZE_T_MAX - replen)
-            goto overflow;
-        requiredsize += replen;
-        if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
-            goto overflow;
-        requiredsize += insize - newpos;
-        if (requiredsize > outsize) {
-            if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
-                requiredsize = 2*outsize;
-            if (unicode_resize(output, requiredsize) < 0)
-                goto onError;
-        }
-        if (unicode_widen(output, *outpos,
-                          PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
-            goto onError;
-        _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen);
-        *outpos += replen;
-    }
-    else {
-        wchar_t *repwstr;
-        Py_ssize_t repwlen;
-        repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
-        if (repwstr == NULL)
+    repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
+    if (repwstr == NULL)
+        goto onError;
+    /* need more space? (at least enough for what we
+       have+the replacement+the rest of the string (starting
+       at the new input position), so we won't have to check space
+       when there are no errors in the rest of the string) */
+    requiredsize = *outpos;
+    if (requiredsize > PY_SSIZE_T_MAX - repwlen)
+        goto overflow;
+    requiredsize += repwlen;
+    if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
+        goto overflow;
+    requiredsize += insize - newpos;
+    if (requiredsize > outsize) {
+        if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
+            requiredsize = 2*outsize;
+        if (unicode_resize(output, requiredsize) < 0)
             goto onError;
-        /* need more space? (at least enough for what we
-           have+the replacement+the rest of the string (starting
-           at the new input position), so we won't have to check space
-           when there are no errors in the rest of the string) */
-        requiredsize = *outpos;
-        if (requiredsize > PY_SSIZE_T_MAX - repwlen)
-            goto overflow;
-        requiredsize += repwlen;
-        if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
-            goto overflow;
-        requiredsize += insize - newpos;
-        if (requiredsize > outsize) {
-            if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
-                requiredsize = 2*outsize;
-            if (unicode_resize(output, requiredsize) < 0)
-                goto onError;
-        }
-        wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
-        *outpos += repwlen;
     }
+    wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
+    *outpos += repwlen;
     *endinpos = newpos;
     *inptr = *input + newpos;
 
     /* we made it! */
-    res = 0;
-
-  onError:
     Py_XDECREF(restuple);
-    return res;
+    return 0;
 
   overflow:
     PyErr_SetString(PyExc_OverflowError,
                     "decoded result is too long for a Python string");
-    goto onError;
+
+  onError:
+    Py_XDECREF(restuple);
+    return -1;
+}
+#endif   /* HAVE_MBCS */
+
+static int
+unicode_decode_call_errorhandler_writer(
+    const char *errors, PyObject **errorHandler,
+    const char *encoding, const char *reason,
+    const char **input, const char **inend, Py_ssize_t *startinpos,
+    Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
+    _PyUnicodeWriter *writer /* PyObject **output, Py_ssize_t *outpos */)
+{
+    static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
+
+    PyObject *restuple = NULL;
+    PyObject *repunicode = NULL;
+    Py_ssize_t insize;
+    Py_ssize_t newpos;
+    Py_ssize_t replen;
+    PyObject *inputobj = NULL;
+
+    if (*errorHandler == NULL) {
+        *errorHandler = PyCodec_LookupError(errors);
+        if (*errorHandler == NULL)
+            goto onError;
+    }
+
+    make_decode_exception(exceptionObject,
+        encoding,
+        *input, *inend - *input,
+        *startinpos, *endinpos,
+        reason);
+    if (*exceptionObject == NULL)
+        goto onError;
+
+    restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
+    if (restuple == NULL)
+        goto onError;
+    if (!PyTuple_Check(restuple)) {
+        PyErr_SetString(PyExc_TypeError, &argparse[4]);
+        goto onError;
+    }
+    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
+        goto onError;
+
+    /* Copy back the bytes variables, which might have been modified by the
+       callback */
+    inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
+    if (!inputobj)
+        goto onError;
+    if (!PyBytes_Check(inputobj)) {
+        PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
+    }
+    *input = PyBytes_AS_STRING(inputobj);
+    insize = PyBytes_GET_SIZE(inputobj);
+    *inend = *input + insize;
+    /* we can DECREF safely, as the exception has another reference,
+       so the object won't go away. */
+    Py_DECREF(inputobj);
+
+    if (newpos<0)
+        newpos = insize+newpos;
+    if (newpos<0 || newpos>insize) {
+        PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
+        goto onError;
+    }
+
+    if (PyUnicode_READY(repunicode) < 0)
+        goto onError;
+    replen = PyUnicode_GET_LENGTH(repunicode);
+    if (replen > 1) {
+        writer->min_length += replen - 1;
+        writer->overallocate = 1;
+        if (_PyUnicodeWriter_Prepare(writer, writer->min_length,
+                            PyUnicode_MAX_CHAR_VALUE(repunicode)) == -1)
+            goto onError;
+    }
+    if (_PyUnicodeWriter_WriteStr(writer, repunicode) == -1)
+        goto onError;
+
+    *endinpos = newpos;
+    *inptr = *input + newpos;
+
+    /* we made it! */
+    Py_XDECREF(restuple);
+    return 0;
+
+  onError:
+    Py_XDECREF(restuple);
+    return -1;
 }
 
 /* --- UTF-7 Codec -------------------------------------------------------- */
@@ -4336,9 +4319,8 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
     const char *e;
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     const char *errmsg = "";
     int inShift = 0;
     Py_ssize_t shiftOutStart;
@@ -4348,17 +4330,17 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    /* Start off assuming it's all ASCII. Widen later as necessary. */
-    unicode = PyUnicode_New(size, 127);
-    if (!unicode)
-        return NULL;
     if (size == 0) {
         if (consumed)
             *consumed = 0;
-        return unicode;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
-    shiftOutStart = outpos = 0;
+    /* Start off assuming it's all ASCII. Widen later as necessary. */
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+
+    shiftOutStart = 0;
     e = s + size;
 
     while (s < e) {
@@ -4381,13 +4363,13 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                         /* expecting a second surrogate */
                         if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) {
                             Py_UCS4 ch2 = Py_UNICODE_JOIN_SURROGATES(surrogate, outCh);
-                            if (unicode_putchar(&unicode, &outpos, ch2) < 0)
+                            if (_PyUnicodeWriter_WriteCharInline(&writer, ch2) < 0)
                                 goto onError;
                             surrogate = 0;
                             continue;
                         }
                         else {
-                            if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
+                            if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0)
                                 goto onError;
                             surrogate = 0;
                         }
@@ -4397,38 +4379,38 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                         surrogate = outCh;
                     }
                     else {
-                        if (unicode_putchar(&unicode, &outpos, outCh) < 0)
+                        if (_PyUnicodeWriter_WriteCharInline(&writer, outCh) < 0)
                             goto onError;
                     }
                 }
             }
             else { /* now leaving a base-64 section */
                 inShift = 0;
-                s++;
-                if (surrogate) {
-                    if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
-                        goto onError;
-                    surrogate = 0;
-                }
                 if (base64bits > 0) { /* left-over bits */
                     if (base64bits >= 6) {
                         /* We've seen at least one base-64 character */
+                        s++;
                         errmsg = "partial character in shift sequence";
                         goto utf7Error;
                     }
                     else {
                         /* Some bits remain; they should be zero */
                         if (base64buffer != 0) {
+                            s++;
                             errmsg = "non-zero padding bits in shift sequence";
                             goto utf7Error;
                         }
                     }
                 }
-                if (ch != '-') {
+                if (surrogate && DECODE_DIRECT(ch)) {
+                    if (_PyUnicodeWriter_WriteCharInline(&writer, surrogate) < 0)
+                        goto onError;
+                }
+                surrogate = 0;
+                if (ch == '-') {
                     /* '-' is absorbed; other terminating
                        characters are preserved */
-                    if (unicode_putchar(&unicode, &outpos, ch) < 0)
-                        goto onError;
+                    s++;
                 }
             }
         }
@@ -4437,20 +4419,21 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
             s++; /* consume '+' */
             if (s < e && *s == '-') { /* '+-' encodes '+' */
                 s++;
-                if (unicode_putchar(&unicode, &outpos, '+') < 0)
+                if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
                     goto onError;
             }
             else { /* begin base64-encoded section */
                 inShift = 1;
-                shiftOutStart = outpos;
+                surrogate = 0;
+                shiftOutStart = writer.pos;
                 base64bits = 0;
                 base64buffer = 0;
             }
         }
         else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
-            if (unicode_putchar(&unicode, &outpos, ch) < 0)
-                goto onError;
             s++;
+            if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
+                goto onError;
         }
         else {
             startinpos = s-starts;
@@ -4461,11 +4444,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
         continue;
 utf7Error:
         endinpos = s-starts;
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
                 "utf7", errmsg,
                 &starts, &e, &startinpos, &endinpos, &exc, &s,
-                &unicode, &outpos))
+                &writer))
             goto onError;
     }
 
@@ -4473,15 +4456,16 @@ utf7Error:
 
     if (inShift && !consumed) { /* in shift sequence, no more to follow */
         /* if we're in an inconsistent state, that's an error */
+        inShift = 0;
         if (surrogate ||
                 (base64bits >= 6) ||
                 (base64bits > 0 && base64buffer != 0)) {
             endinpos = size;
-            if (unicode_decode_call_errorhandler(
+            if (unicode_decode_call_errorhandler_writer(
                     errors, &errorHandler,
                     "utf7", "unterminated shift sequence",
                     &starts, &e, &startinpos, &endinpos, &exc, &s,
-                    &unicode, &outpos))
+                    &writer))
                 goto onError;
             if (s < e)
                 goto restart;
@@ -4492,32 +4476,29 @@ utf7Error:
     if (consumed) {
         if (inShift) {
             *consumed = startinpos;
-            if (outpos != shiftOutStart &&
-                PyUnicode_MAX_CHAR_VALUE(unicode) > 127) {
+            if (writer.pos != shiftOutStart && writer.maxchar > 127) {
                 PyObject *result = PyUnicode_FromKindAndData(
-                        PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
-                        shiftOutStart);
-                Py_DECREF(unicode);
-                unicode = result;
+                        writer.kind, writer.data, shiftOutStart);
+                Py_XDECREF(errorHandler);
+                Py_XDECREF(exc);
+                _PyUnicodeWriter_Dealloc(&writer);
+                return result;
             }
-            outpos = shiftOutStart; /* back off output */
+            writer.pos = shiftOutStart; /* back off output */
         }
         else {
             *consumed = s-starts;
         }
     }
 
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
-
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(unicode);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    Py_DECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     return NULL;
 }
 
@@ -4600,7 +4581,7 @@ encode_char:
 
             /* code first surrogate */
             base64bits += 16;
-            base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10);
+            base64buffer = (base64buffer << 16) | Py_UNICODE_HIGH_SURROGATE(ch);
             while (base64bits >= 6) {
                 *out++ = TO_BASE64(base64buffer >> (base64bits-6));
                 base64bits -= 6;
@@ -4701,9 +4682,9 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
     if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
         /* Fast path, see in STRINGLIB(utf8_decode) for
            an explanation. */
-        /* Help register allocation */
-        register const char *_p = p;
-        register Py_UCS1 * q = dest;
+        /* Help allocation */
+        const char *_p = p;
+        Py_UCS1 * q = dest;
         while (_p < aligned_end) {
             unsigned long value = *(const unsigned long *) _p;
             if (value & ASCII_CHAR_MASK)
@@ -4726,8 +4707,8 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
         /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
            for an explanation. */
         if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
-            /* Help register allocation */
-            register const char *_p = p;
+            /* Help allocation */
+            const char *_p = p;
             while (_p < aligned_end) {
                 unsigned long value = *(unsigned long *) _p;
                 if (value & ASCII_CHAR_MASK)
@@ -4752,10 +4733,9 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
                              const char *errors,
                              Py_ssize_t *consumed)
 {
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     const char *starts = s;
     const char *end = s + size;
-    Py_ssize_t outpos;
 
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
@@ -4776,29 +4756,26 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
         return get_latin1_char((unsigned char)s[0]);
     }
 
-    unicode = PyUnicode_New(size, 127);
-    if (!unicode)
-        return NULL;
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+        goto onError;
 
-    outpos = ascii_decode(s, end, PyUnicode_1BYTE_DATA(unicode));
-    s += outpos;
+    writer.pos = ascii_decode(s, end, writer.data);
+    s += writer.pos;
     while (s < end) {
         Py_UCS4 ch;
-        int kind = PyUnicode_KIND(unicode);
+        int kind = writer.kind;
         if (kind == PyUnicode_1BYTE_KIND) {
-            if (PyUnicode_IS_ASCII(unicode))
-                ch = asciilib_utf8_decode(&s, end,
-                        PyUnicode_1BYTE_DATA(unicode), &outpos);
+            if (PyUnicode_IS_ASCII(writer.buffer))
+                ch = asciilib_utf8_decode(&s, end, writer.data, &writer.pos);
             else
-                ch = ucs1lib_utf8_decode(&s, end,
-                        PyUnicode_1BYTE_DATA(unicode), &outpos);
+                ch = ucs1lib_utf8_decode(&s, end, writer.data, &writer.pos);
         } else if (kind == PyUnicode_2BYTE_KIND) {
-            ch = ucs2lib_utf8_decode(&s, end,
-                    PyUnicode_2BYTE_DATA(unicode), &outpos);
+            ch = ucs2lib_utf8_decode(&s, end, writer.data, &writer.pos);
         } else {
             assert(kind == PyUnicode_4BYTE_KIND);
-            ch = ucs4lib_utf8_decode(&s, end,
-                    PyUnicode_4BYTE_DATA(unicode), &outpos);
+            ch = ucs4lib_utf8_decode(&s, end, writer.data, &writer.pos);
         }
 
         switch (ch) {
@@ -4822,35 +4799,31 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
             endinpos = startinpos + ch - 1;
             break;
         default:
-            if (unicode_putchar(&unicode, &outpos, ch) < 0)
+            if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
                 goto onError;
             continue;
         }
 
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
                 "utf-8", errmsg,
                 &starts, &end, &startinpos, &endinpos, &exc, &s,
-                &unicode, &outpos))
+                &writer))
             goto onError;
     }
 
 End:
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
-
     if (consumed)
         *consumed = s - starts;
 
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    assert(_PyUnicode_CheckConsistency(unicode, 1));
-    return unicode;
+    return _PyUnicodeWriter_Finish(&writer);
 
 onError:
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    Py_XDECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     return NULL;
 }
 
@@ -4860,7 +4833,7 @@ onError:
    used to decode the command line arguments on Mac OS X.
 
    Return a pointer to a newly allocated wide character string (use
-   PyMem_Free() to free the memory), or NULL on memory allocation error. */
+   PyMem_RawFree() to free the memory), or NULL on memory allocation error. */
 
 wchar_t*
 _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
@@ -4873,7 +4846,7 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
        character count */
     if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
         return NULL;
-    unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
+    unicode = PyMem_RawMalloc((size + 1) * sizeof(wchar_t));
     if (!unicode)
         return NULL;
 
@@ -4996,17 +4969,11 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     const unsigned char *q, *e;
-    int bo = 0;       /* assume native ordering by default */
+    int le, bo = 0;       /* assume native ordering by default */
+    const char *encoding;
     const char *errmsg = "";
-    /* Offsets from q for retrieving bytes in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    int iorder[] = {0, 1, 2, 3};
-#else
-    int iorder[] = {3, 2, 1, 0};
-#endif
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
@@ -5020,107 +4987,118 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
        byte order setting accordingly. In native mode, the leading BOM
        mark is skipped, in all other modes, it is copied to the output
        stream as-is (giving a ZWNBSP character). */
-    if (bo == 0) {
-        if (size >= 4) {
-            const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
-                (q[iorder[1]] << 8) | q[iorder[0]];
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-            if (bom == 0x0000FEFF) {
-                q += 4;
-                bo = -1;
-            }
-            else if (bom == 0xFFFE0000) {
-                q += 4;
-                bo = 1;
-            }
-#else
-            if (bom == 0x0000FEFF) {
-                q += 4;
-                bo = 1;
-            }
-            else if (bom == 0xFFFE0000) {
-                q += 4;
-                bo = -1;
-            }
-#endif
+    if (bo == 0 && size >= 4) {
+        Py_UCS4 bom = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0];
+        if (bom == 0x0000FEFF) {
+            bo = -1;
+            q += 4;
         }
+        else if (bom == 0xFFFE0000) {
+            bo = 1;
+            q += 4;
+        }
+        if (byteorder)
+            *byteorder = bo;
     }
 
-    if (bo == -1) {
-        /* force LE */
-        iorder[0] = 0;
-        iorder[1] = 1;
-        iorder[2] = 2;
-        iorder[3] = 3;
-    }
-    else if (bo == 1) {
-        /* force BE */
-        iorder[0] = 3;
-        iorder[1] = 2;
-        iorder[2] = 1;
-        iorder[3] = 0;
+    if (q == e) {
+        if (consumed)
+            *consumed = size;
+        _Py_RETURN_UNICODE_EMPTY();
     }
 
-    /* This might be one to much, because of a BOM */
-    unicode = PyUnicode_New((size+3)/4, 127);
-    if (!unicode)
-        return NULL;
-    if (size == 0)
-        return unicode;
-    outpos = 0;
+#ifdef WORDS_BIGENDIAN
+    le = bo < 0;
+#else
+    le = bo <= 0;
+#endif
+    encoding = le ? "utf-32-le" : "utf-32-be";
 
-    while (q < e) {
-        Py_UCS4 ch;
-        /* remaining bytes at the end? (size should be divisible by 4) */
-        if (e-q<4) {
-            if (consumed)
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = (e - q + 3) / 4;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+        goto onError;
+
+    while (1) {
+        Py_UCS4 ch = 0;
+        Py_UCS4 maxch = PyUnicode_MAX_CHAR_VALUE(writer.buffer);
+
+        if (e - q >= 4) {
+            enum PyUnicode_Kind kind = writer.kind;
+            void *data = writer.data;
+            const unsigned char *last = e - 4;
+            Py_ssize_t pos = writer.pos;
+            if (le) {
+                do {
+                    ch = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0];
+                    if (ch > maxch)
+                        break;
+                    if (kind != PyUnicode_1BYTE_KIND &&
+                        Py_UNICODE_IS_SURROGATE(ch))
+                        break;
+                    PyUnicode_WRITE(kind, data, pos++, ch);
+                    q += 4;
+                } while (q <= last);
+            }
+            else {
+                do {
+                    ch = (q[0] << 24) | (q[1] << 16) | (q[2] << 8) | q[3];
+                    if (ch > maxch)
+                        break;
+                    if (kind != PyUnicode_1BYTE_KIND &&
+                        Py_UNICODE_IS_SURROGATE(ch))
+                        break;
+                    PyUnicode_WRITE(kind, data, pos++, ch);
+                    q += 4;
+                } while (q <= last);
+            }
+            writer.pos = pos;
+        }
+
+        if (Py_UNICODE_IS_SURROGATE(ch)) {
+            errmsg = "code point in surrogate code point range(0xd800, 0xe000)";
+            startinpos = ((const char *)q) - starts;
+            endinpos = startinpos + 4;
+        }
+        else if (ch <= maxch) {
+            if (q == e || consumed)
                 break;
+            /* remaining bytes at the end? (size should be divisible by 4) */
             errmsg = "truncated data";
-            startinpos = ((const char *)q)-starts;
-            endinpos = ((const char *)e)-starts;
-            goto utf32Error;
-            /* The remaining input chars are ignored if the callback
-               chooses to skip the input */
+            startinpos = ((const char *)q) - starts;
+            endinpos = ((const char *)e) - starts;
         }
-        ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
-            (q[iorder[1]] << 8) | q[iorder[0]];
-
-        if (ch >= 0x110000)
-        {
-            errmsg = "codepoint not in range(0x110000)";
-            startinpos = ((const char *)q)-starts;
-            endinpos = startinpos+4;
-            goto utf32Error;
+        else {
+            if (ch < 0x110000) {
+                if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
+                    goto onError;
+                q += 4;
+                continue;
+            }
+            errmsg = "code point not in range(0x110000)";
+            startinpos = ((const char *)q) - starts;
+            endinpos = startinpos + 4;
         }
-        if (unicode_putchar(&unicode, &outpos, ch) < 0)
-            goto onError;
-        q += 4;
-        continue;
-      utf32Error:
-        if (unicode_decode_call_errorhandler(
+
+        /* The remaining input chars are ignored if the callback
+           chooses to skip the input */
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
-                "utf32", errmsg,
+                encoding, errmsg,
                 &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
-                &unicode, &outpos))
+                &writer))
             goto onError;
     }
 
-    if (byteorder)
-        *byteorder = bo;
-
     if (consumed)
         *consumed = (const char *)q-starts;
 
-    /* Adjust length */
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
-
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(unicode);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_DECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -5138,11 +5116,15 @@ _PyUnicode_EncodeUTF32(PyObject *str,
     unsigned char *p;
     Py_ssize_t nsize, i;
     /* Offsets from p for storing byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
     int iorder[] = {0, 1, 2, 3};
 #else
     int iorder[] = {3, 2, 1, 0};
 #endif
+    const char *encoding;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    PyObject *rep = NULL;
 
 #define STORECHAR(CH)                           \
     do {                                        \
@@ -5174,7 +5156,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
     if (byteorder == 0)
         STORECHAR(0xFEFF);
     if (len == 0)
-        goto done;
+        return v;
 
     if (byteorder == -1) {
         /* force LE */
@@ -5182,6 +5164,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
         iorder[1] = 1;
         iorder[2] = 2;
         iorder[3] = 3;
+        encoding = "utf-32-le";
     }
     else if (byteorder == 1) {
         /* force BE */
@@ -5189,13 +5172,103 @@ _PyUnicode_EncodeUTF32(PyObject *str,
         iorder[1] = 2;
         iorder[2] = 1;
         iorder[3] = 0;
+        encoding = "utf-32-be";
     }
+    else
+        encoding = "utf-32";
 
-    for (i = 0; i < len; i++)
-        STORECHAR(PyUnicode_READ(kind, data, i));
+    if (kind == PyUnicode_1BYTE_KIND) {
+        for (i = 0; i < len; i++)
+            STORECHAR(PyUnicode_READ(kind, data, i));
+        return v;
+    }
 
-  done:
+    for (i = 0; i < len;) {
+        Py_ssize_t repsize, moreunits;
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+        i++;
+        assert(ch <= MAX_UNICODE);
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+            STORECHAR(ch);
+            continue;
+        }
+
+        rep = unicode_encode_call_errorhandler(
+                errors, &errorHandler,
+                encoding, "surrogates not allowed",
+                str, &exc, i-1, i, &i);
+
+        if (!rep)
+            goto error;
+
+        if (PyBytes_Check(rep)) {
+            repsize = PyBytes_GET_SIZE(rep);
+            if (repsize & 3) {
+                raise_encode_exception(&exc, encoding,
+                                       str, i - 1, i,
+                                       "surrogates not allowed");
+                goto error;
+            }
+            moreunits = repsize / 4;
+        }
+        else {
+            assert(PyUnicode_Check(rep));
+            if (PyUnicode_READY(rep) < 0)
+                goto error;
+            moreunits = repsize = PyUnicode_GET_LENGTH(rep);
+            if (!PyUnicode_IS_ASCII(rep)) {
+                raise_encode_exception(&exc, encoding,
+                                       str, i - 1, i,
+                                       "surrogates not allowed");
+                goto error;
+            }
+        }
+
+        /* four bytes are reserved for each surrogate */
+        if (moreunits > 1) {
+            Py_ssize_t outpos = p - (unsigned char*) PyBytes_AS_STRING(v);
+            Py_ssize_t morebytes = 4 * (moreunits - 1);
+            if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) {
+                /* integer overflow */
+                PyErr_NoMemory();
+                goto error;
+            }
+            if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + morebytes) < 0)
+                goto error;
+            p = (unsigned char*) PyBytes_AS_STRING(v) + outpos;
+        }
+
+        if (PyBytes_Check(rep)) {
+            Py_MEMCPY(p, PyBytes_AS_STRING(rep), repsize);
+            p += repsize;
+        } else /* rep is unicode */ {
+            const Py_UCS1 *repdata;
+            assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
+            repdata = PyUnicode_1BYTE_DATA(rep);
+            while (repsize--) {
+                Py_UCS4 ch = *repdata++;
+                STORECHAR(ch);
+            }
+        }
+
+        Py_CLEAR(rep);
+    }
+
+    /* Cut back to size actually needed. This is necessary for, for example,
+       encoding of a string containing isolated surrogates and the 'ignore'
+       handler is used. */
+    nsize = p - (unsigned char*) PyBytes_AS_STRING(v);
+    if (nsize != PyBytes_GET_SIZE(v))
+      _PyBytes_Resize(&v, nsize);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
     return v;
+  error:
+    Py_XDECREF(rep);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    Py_XDECREF(v);
+    return NULL;
 #undef STORECHAR
 }
 
@@ -5241,14 +5314,14 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     const unsigned char *q, *e;
     int bo = 0;       /* assume native ordering by default */
     int native_ordering;
     const char *errmsg = "";
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
+    const char *encoding;
 
     q = (unsigned char *)s;
     e = q + size;
@@ -5280,40 +5353,42 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
         _Py_RETURN_UNICODE_EMPTY();
     }
 
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
     native_ordering = bo <= 0;
+    encoding = bo <= 0 ? "utf-16-le" : "utf-16-be";
 #else
     native_ordering = bo >= 0;
+    encoding = bo >= 0 ? "utf-16-be" : "utf-16-le";
 #endif
 
     /* Note: size will always be longer than the resulting Unicode
        character count */
-    unicode = PyUnicode_New((e - q + 1) / 2, 127);
-    if (!unicode)
-        return NULL;
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = (e - q + 1) / 2;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+        goto onError;
 
-    outpos = 0;
     while (1) {
         Py_UCS4 ch = 0;
         if (e - q >= 2) {
-            int kind = PyUnicode_KIND(unicode);
+            int kind = writer.kind;
             if (kind == PyUnicode_1BYTE_KIND) {
-                if (PyUnicode_IS_ASCII(unicode))
+                if (PyUnicode_IS_ASCII(writer.buffer))
                     ch = asciilib_utf16_decode(&q, e,
-                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            (Py_UCS1*)writer.data, &writer.pos,
                             native_ordering);
                 else
                     ch = ucs1lib_utf16_decode(&q, e,
-                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            (Py_UCS1*)writer.data, &writer.pos,
                             native_ordering);
             } else if (kind == PyUnicode_2BYTE_KIND) {
                 ch = ucs2lib_utf16_decode(&q, e,
-                        PyUnicode_2BYTE_DATA(unicode), &outpos,
+                        (Py_UCS2*)writer.data, &writer.pos,
                         native_ordering);
             } else {
                 assert(kind == PyUnicode_4BYTE_KIND);
                 ch = ucs4lib_utf16_decode(&q, e,
-                        PyUnicode_4BYTE_DATA(unicode), &outpos,
+                        (Py_UCS4*)writer.data, &writer.pos,
                         native_ordering);
             }
         }
@@ -5349,23 +5424,22 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
             endinpos = startinpos + 2;
             break;
         default:
-            if (unicode_putchar(&unicode, &outpos, ch) < 0)
+            if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
                 goto onError;
             continue;
         }
 
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors,
                 &errorHandler,
-                "utf16", errmsg,
+                encoding, errmsg,
                 &starts,
                 (const char **)&e,
                 &startinpos,
                 &endinpos,
                 &exc,
                 (const char **)&q,
-                &unicode,
-                &outpos))
+                &writer))
             goto onError;
     }
 
@@ -5373,16 +5447,12 @@ End:
     if (consumed)
         *consumed = (const char *)q-starts;
 
-    /* Adjust length */
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
-
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(unicode);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_DECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -5398,13 +5468,17 @@ _PyUnicode_EncodeUTF16(PyObject *str,
     Py_ssize_t len;
     PyObject *v;
     unsigned short *out;
-    Py_ssize_t bytesize;
     Py_ssize_t pairs;
-#ifdef WORDS_BIGENDIAN
+#if PY_BIG_ENDIAN
     int native_ordering = byteorder >= 0;
 #else
     int native_ordering = byteorder <= 0;
 #endif
+    const char *encoding;
+    Py_ssize_t nsize, pos;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    PyObject *rep = NULL;
 
     if (!PyUnicode_Check(str)) {
         PyErr_BadArgument();
@@ -5426,8 +5500,8 @@ _PyUnicode_EncodeUTF16(PyObject *str,
     }
     if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0))
         return PyErr_NoMemory();
-    bytesize = (len + pairs + (byteorder == 0)) * 2;
-    v = PyBytes_FromStringAndSize(NULL, bytesize);
+    nsize = len + pairs + (byteorder == 0);
+    v = PyBytes_FromStringAndSize(NULL, nsize * 2);
     if (v == NULL)
         return NULL;
 
@@ -5439,25 +5513,107 @@ _PyUnicode_EncodeUTF16(PyObject *str,
     if (len == 0)
         goto done;
 
-    switch (kind) {
-    case PyUnicode_1BYTE_KIND: {
-        ucs1lib_utf16_encode(out, (const Py_UCS1 *)data, len, native_ordering);
-        break;
-    }
-    case PyUnicode_2BYTE_KIND: {
-        ucs2lib_utf16_encode(out, (const Py_UCS2 *)data, len, native_ordering);
-        break;
-    }
-    case PyUnicode_4BYTE_KIND: {
-        ucs4lib_utf16_encode(out, (const Py_UCS4 *)data, len, native_ordering);
-        break;
+    if (kind == PyUnicode_1BYTE_KIND) {
+        ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering);
+        goto done;
     }
-    default:
-        assert(0);
+
+    if (byteorder < 0)
+        encoding = "utf-16-le";
+    else if (byteorder > 0)
+        encoding = "utf-16-be";
+    else
+        encoding = "utf-16";
+
+    pos = 0;
+    while (pos < len) {
+        Py_ssize_t repsize, moreunits;
+
+        if (kind == PyUnicode_2BYTE_KIND) {
+            pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos,
+                                        &out, native_ordering);
+        }
+        else {
+            assert(kind == PyUnicode_4BYTE_KIND);
+            pos += ucs4lib_utf16_encode((const Py_UCS4 *)data + pos, len - pos,
+                                        &out, native_ordering);
+        }
+        if (pos == len)
+            break;
+
+        rep = unicode_encode_call_errorhandler(
+                errors, &errorHandler,
+                encoding, "surrogates not allowed",
+                str, &exc, pos, pos + 1, &pos);
+        if (!rep)
+            goto error;
+
+        if (PyBytes_Check(rep)) {
+            repsize = PyBytes_GET_SIZE(rep);
+            if (repsize & 1) {
+                raise_encode_exception(&exc, encoding,
+                                       str, pos - 1, pos,
+                                       "surrogates not allowed");
+                goto error;
+            }
+            moreunits = repsize / 2;
+        }
+        else {
+            assert(PyUnicode_Check(rep));
+            if (PyUnicode_READY(rep) < 0)
+                goto error;
+            moreunits = repsize = PyUnicode_GET_LENGTH(rep);
+            if (!PyUnicode_IS_ASCII(rep)) {
+                raise_encode_exception(&exc, encoding,
+                                       str, pos - 1, pos,
+                                       "surrogates not allowed");
+                goto error;
+            }
+        }
+
+        /* two bytes are reserved for each surrogate */
+        if (moreunits > 1) {
+            Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v);
+            Py_ssize_t morebytes = 2 * (moreunits - 1);
+            if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) {
+                /* integer overflow */
+                PyErr_NoMemory();
+                goto error;
+            }
+            if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + morebytes) < 0)
+                goto error;
+            out = (unsigned short*) PyBytes_AS_STRING(v) + outpos;
+        }
+
+        if (PyBytes_Check(rep)) {
+            Py_MEMCPY(out, PyBytes_AS_STRING(rep), repsize);
+            out += moreunits;
+        } else /* rep is unicode */ {
+            assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
+            ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize,
+                                 &out, native_ordering);
+        }
+
+        Py_CLEAR(rep);
     }
 
+    /* Cut back to size actually needed. This is necessary for, for example,
+    encoding of a string containing isolated surrogates and the 'ignore' handler
+    is used. */
+    nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v);
+    if (nsize != PyBytes_GET_SIZE(v))
+      _PyBytes_Resize(&v, nsize);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
   done:
     return v;
+  error:
+    Py_XDECREF(rep);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    Py_XDECREF(v);
+    return NULL;
+#undef STORECHAR
 }
 
 PyObject *
@@ -5548,27 +5704,26 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    PyObject *v;
+    _PyUnicodeWriter writer;
     const char *end;
     char* message;
     Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
     Py_ssize_t len;
-    Py_ssize_t i;
 
     len = length_of_escaped_ascii_string(s, size);
+    if (len == 0)
+        _Py_RETURN_UNICODE_EMPTY();
 
     /* After length_of_escaped_ascii_string() there are two alternatives,
        either the string is pure ASCII with named escapes like \n, etc.
        and we determined it's exact size (common case)
        or it contains \x, \u, ... escape sequences.  then we create a
        legacy wchar string and resize it at the end of this function. */
-    if (len >= 0) {
-        v = PyUnicode_New(len, 127);
-        if (!v)
-            goto onError;
-        assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+    _PyUnicodeWriter_Init(&writer);
+    if (len > 0) {
+        writer.min_length = len;
     }
     else {
         /* Escaped strings will always be longer than the resulting
@@ -5576,15 +5731,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
            length after conversion to the true value.
            (but if the error callback returns a long replacement string
            we'll have to allocate more space) */
-        v = PyUnicode_New(size, 127);
-        if (!v)
-            goto onError;
-        len = size;
+        writer.min_length = size;
     }
 
     if (size == 0)
-        return v;
-    i = 0;
+        return _PyUnicodeWriter_Finish(&writer);
     end = s + size;
 
     while (s < end) {
@@ -5592,13 +5743,11 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
         Py_UCS4 x;
         int digits;
 
-        /* The only case in which i == ascii_length is a backslash
-           followed by a newline. */
-        assert(i <= len);
-
         /* Non-escape characters are interpreted as Unicode ordinals */
         if (*s != '\\') {
-            if (unicode_putchar(&v, &i, (unsigned char) *s++) < 0)
+            x = (unsigned char)*s;
+            s++;
+            if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
                 goto onError;
             continue;
         }
@@ -5610,18 +5759,14 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
         if (s > end)
             c = '\0'; /* Invalid after \ */
 
-        /* The only case in which i == ascii_length is a backslash
-           followed by a newline. */
-        assert(i < len || (i == len && c == '\n'));
-
         switch (c) {
 
             /* \x escapes */
-#define WRITECHAR(ch)                                   \
-            do {                                        \
-                if (unicode_putchar(&v, &i, ch) < 0)    \
-                    goto onError;                       \
-            }while(0)
+#define WRITECHAR(ch)                                                      \
+            do {                                                           \
+                if (_PyUnicodeWriter_WriteCharInline(&writer, (ch)) < 0)    \
+                    goto onError;                                          \
+            } while(0)
 
         case '\n': break;
         case '\\': WRITECHAR('\\'); break;
@@ -5745,35 +5890,32 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
 
       error:
         endinpos = s-starts;
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
                 "unicodeescape", message,
                 &starts, &end, &startinpos, &endinpos, &exc, &s,
-                &v, &i))
+                &writer))
             goto onError;
-        len = PyUnicode_GET_LENGTH(v);
         continue;
     }
 #undef WRITECHAR
 
-    if (unicode_resize(&v, i) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(v);
+    return _PyUnicodeWriter_Finish(&writer);
 
   ucnhashError:
     PyErr_SetString(
         PyExc_UnicodeError,
         "\\N escapes not supported (can't load unicodedata module)"
         );
-    Py_XDECREF(v);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
 
   onError:
-    Py_XDECREF(v);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -5926,23 +6068,22 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
-    PyObject *v;
+    _PyUnicodeWriter writer;
     const char *end;
     const char *bs;
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
+
     /* Escaped strings will always be longer than the resulting
        Unicode string, so we start with size here and then reduce the
        length after conversion to the true value. (But decoding error
        handler might have to resize the string) */
-    v = PyUnicode_New(size, 127);
-    if (v == NULL)
-        goto onError;
-    if (size == 0)
-        return v;
-    outpos = 0;
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+
     end = s + size;
     while (s < end) {
         unsigned char c;
@@ -5952,7 +6093,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
 
         /* Non-escape characters are interpreted as Unicode ordinals */
         if (*s != '\\') {
-            if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
+            x = (unsigned char)*s++;
+            if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
                 goto onError;
             continue;
         }
@@ -5964,7 +6106,8 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
         for (;s < end;) {
             if (*s != '\\')
                 break;
-            if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
+            x = (unsigned char)*s++;
+            if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
                 goto onError;
         }
         if (((s - bs) & 1) == 0 ||
@@ -5972,7 +6115,7 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
             (*s != 'u' && *s != 'U')) {
             continue;
         }
-        outpos--;
+        writer.pos--;
         count = *s=='u' ? 4 : 8;
         s++;
 
@@ -5981,11 +6124,11 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
             c = (unsigned char)*s;
             if (!Py_ISXDIGIT(c)) {
                 endinpos = s-starts;
-                if (unicode_decode_call_errorhandler(
+                if (unicode_decode_call_errorhandler_writer(
                         errors, &errorHandler,
                         "rawunicodeescape", "truncated \\uXXXX",
                         &starts, &end, &startinpos, &endinpos, &exc, &s,
-                        &v, &outpos))
+                        &writer))
                     goto onError;
                 goto nextByte;
             }
@@ -5998,28 +6141,27 @@ PyUnicode_DecodeRawUnicodeEscape(const char *s,
                 x += 10 + c - 'A';
         }
         if (x <= MAX_UNICODE) {
-            if (unicode_putchar(&v, &outpos, x) < 0)
+            if (_PyUnicodeWriter_WriteCharInline(&writer, x) < 0)
                 goto onError;
-        } else {
+        }
+        else {
             endinpos = s-starts;
-            if (unicode_decode_call_errorhandler(
+            if (unicode_decode_call_errorhandler_writer(
                     errors, &errorHandler,
                     "rawunicodeescape", "\\Uxxxxxxxx out of range",
                     &starts, &end, &startinpos, &endinpos, &exc, &s,
-                    &v, &outpos))
+                    &writer))
                 goto onError;
         }
       nextByte:
         ;
     }
-    if (unicode_resize(&v, outpos) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(v);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_XDECREF(v);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -6119,8 +6261,7 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
     const char *starts = s;
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
-    Py_ssize_t outpos;
-    PyObject *v;
+    _PyUnicodeWriter writer;
     const char *end;
     const char *reason;
     PyObject *errorHandler = NULL;
@@ -6131,15 +6272,17 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
                      1))
         return NULL;
 
-    /* XXX overflow detection missing */
-    v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127);
-    if (v == NULL)
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
+
+    _PyUnicodeWriter_Init(&writer);
+    if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
+        PyErr_NoMemory();
         goto onError;
-    if (PyUnicode_GET_LENGTH(v) == 0)
-        return v;
-    outpos = 0;
-    end = s + size;
+    }
+    writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
 
+    end = s + size;
     while (s < end) {
         Py_UNICODE uch;
         Py_UCS4 ch;
@@ -6181,28 +6324,26 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
         }
 #endif
 
-        if (unicode_putchar(&v, &outpos, ch) < 0)
+        if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
             goto onError;
         continue;
 
   error:
         startinpos = s - starts;
-        if (unicode_decode_call_errorhandler(
+        if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
                 "unicode_internal", reason,
                 &starts, &end, &startinpos, &endinpos, &exc, &s,
-                &v, &outpos))
+                &writer))
             goto onError;
     }
 
-    if (unicode_resize(&v, outpos) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(v);
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_XDECREF(v);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -6586,7 +6727,7 @@ PyUnicode_DecodeASCII(const char *s,
                       const char *errors)
 {
     const char *starts = s;
-    PyObject *unicode;
+    _PyUnicodeWriter writer;
     int kind;
     void *data;
     Py_ssize_t startinpos;
@@ -6603,46 +6744,46 @@ PyUnicode_DecodeASCII(const char *s,
     if (size == 1 && (unsigned char)s[0] < 128)
         return get_latin1_char((unsigned char)s[0]);
 
-    unicode = PyUnicode_New(size, 127);
-    if (unicode == NULL)
-        goto onError;
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) < 0)
+        return NULL;
 
     e = s + size;
-    data = PyUnicode_1BYTE_DATA(unicode);
+    data = writer.data;
     outpos = ascii_decode(s, e, (Py_UCS1 *)data);
-    if (outpos == size)
-        return unicode;
+    writer.pos = outpos;
+    if (writer.pos == size)
+        return _PyUnicodeWriter_Finish(&writer);
 
-    s += outpos;
-    kind = PyUnicode_1BYTE_KIND;
+    s += writer.pos;
+    kind = writer.kind;
     while (s < e) {
-        register unsigned char c = (unsigned char)*s;
+        unsigned char c = (unsigned char)*s;
         if (c < 128) {
-            PyUnicode_WRITE(kind, data, outpos++, c);
+            PyUnicode_WRITE(kind, data, writer.pos, c);
+            writer.pos++;
             ++s;
         }
         else {
             startinpos = s-starts;
             endinpos = startinpos + 1;
-            if (unicode_decode_call_errorhandler(
+            if (unicode_decode_call_errorhandler_writer(
                     errors, &errorHandler,
                     "ascii", "ordinal not in range(128)",
                     &starts, &e, &startinpos, &endinpos, &exc, &s,
-                    &unicode, &outpos))
+                    &writer))
                 goto onError;
-            kind = PyUnicode_KIND(unicode);
-            data = PyUnicode_DATA(unicode);
+            kind = writer.kind;
+            data = writer.data;
         }
     }
-    if (unicode_resize(&unicode, outpos) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    assert(_PyUnicode_CheckConsistency(unicode, 1));
-    return unicode;
+    return _PyUnicodeWriter_Finish(&writer);
 
   onError:
-    Py_XDECREF(unicode);
+    _PyUnicodeWriter_Dealloc(&writer);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
@@ -6674,7 +6815,7 @@ _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors)
         return NULL;
     /* Fast path: if it is an ASCII-only string, construct bytes object
        directly. Else defer to above function to raise the exception. */
-    if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
+    if (PyUnicode_IS_ASCII(unicode))
         return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode),
                                          PyUnicode_GET_LENGTH(unicode));
     return unicode_encode_ucs1(unicode, errors, 128);
@@ -6752,8 +6893,8 @@ decode_code_page_flags(UINT code_page)
  * Decode a byte string from a Windows code page into unicode object in strict
  * mode.
  *
- * Returns consumed size if succeed, returns -2 on decode error, or raise a
- * WindowsError and returns -1 on other error.
+ * Returns consumed size if succeed, returns -2 on decode error, or raise an
+ * OSError and returns -1 on other error.
  */
 static int
 decode_code_page_strict(UINT code_page,
@@ -6804,7 +6945,7 @@ error:
  * Decode a byte string from a code page into unicode object with an error
  * handler.
  *
- * Returns consumed size if succeed, or raise a WindowsError or
+ * Returns consumed size if succeed, or raise an OSError or
  * UnicodeDecodeError exception and returns -1 on error.
  */
 static int
@@ -6823,7 +6964,8 @@ decode_code_page_errors(UINT code_page,
     /* each step cannot decode more than 1 character, but a character can be
        represented as a surrogate pair */
     wchar_t buffer[2], *startout, *out;
-    int insize, outsize;
+    int insize;
+    Py_ssize_t outsize;
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
     PyObject *encoding_obj = NULL;
@@ -6903,7 +7045,7 @@ decode_code_page_errors(UINT code_page,
             startinpos = in - startin;
             endinpos = startinpos + 1;
             outpos = out - PyUnicode_AS_UNICODE(*v);
-            if (unicode_decode_call_errorhandler(
+            if (unicode_decode_call_errorhandler_wchar(
                     errors, &errorHandler,
                     encoding, reason,
                     &startin, &endin, &startinpos, &endinpos, &exc, &in,
@@ -7059,7 +7201,7 @@ encode_code_page_flags(UINT code_page, const char *errors)
  * mode.
  *
  * Returns consumed characters if succeed, returns -2 on encode error, or raise
- * a WindowsError and returns -1 on other error.
+ * an OSError and returns -1 on other error.
  */
 static int
 encode_code_page_strict(UINT code_page, PyObject **outbytes,
@@ -7093,10 +7235,11 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
         Py_DECREF(substring);
         return -1;
     }
+    assert(size <= INT_MAX);
 
     /* First get the size of the result */
     outsize = WideCharToMultiByte(code_page, flags,
-                                  p, size,
+                                  p, (int)size,
                                   NULL, 0,
                                   NULL, pusedDefaultChar);
     if (outsize <= 0)
@@ -7133,7 +7276,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
 
     /* Do the conversion */
     outsize = WideCharToMultiByte(code_page, flags,
-                                  p, size,
+                                  p, (int)size,
                                   out, outsize,
                                   NULL, pusedDefaultChar);
     Py_CLEAR(substring);
@@ -7152,10 +7295,10 @@ error:
 }
 
 /*
- * Encode a Unicode string to a Windows code page into a byte string using a
+ * Encode a Unicode string to a Windows code page into a byte string using an
  * error handler.
  *
- * Returns consumed characters if succeed, or raise a WindowsError and returns
+ * Returns consumed characters if succeed, or raise an OSError and returns
  * -1 on other error.
  */
 static int
@@ -7241,9 +7384,8 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
             charsize = 1;
         }
         else {
-            ch -= 0x10000;
-            chars[0] = 0xd800 + (ch >> 10);
-            chars[1] = 0xdc00 + (ch & 0x3ff);
+            chars[0] = Py_UNICODE_HIGH_SURROGATE(ch);
+            chars[1] = Py_UNICODE_LOW_SURROGATE(ch);
             charsize = 2;
         }
 
@@ -7436,220 +7578,258 @@ PyUnicode_AsMBCSString(PyObject *unicode)
 
 /* --- Character Mapping Codec -------------------------------------------- */
 
-PyObject *
-PyUnicode_DecodeCharmap(const char *s,
-                        Py_ssize_t size,
-                        PyObject *mapping,
-                        const char *errors)
+static int
+charmap_decode_string(const char *s,
+                      Py_ssize_t size,
+                      PyObject *mapping,
+                      const char *errors,
+                      _PyUnicodeWriter *writer)
 {
     const char *starts = s;
-    Py_ssize_t startinpos;
-    Py_ssize_t endinpos;
-    Py_ssize_t outpos;
     const char *e;
-    PyObject *v;
-    Py_ssize_t extrachars = 0;
-    PyObject *errorHandler = NULL;
-    PyObject *exc = NULL;
+    Py_ssize_t startinpos, endinpos;
+    PyObject *errorHandler = NULL, *exc = NULL;
+    Py_ssize_t maplen;
+    enum PyUnicode_Kind mapkind;
+    void *mapdata;
+    Py_UCS4 x;
+    unsigned char ch;
+
+    if (PyUnicode_READY(mapping) == -1)
+        return -1;
 
-    /* Default to Latin-1 */
-    if (mapping == NULL)
-        return PyUnicode_DecodeLatin1(s, size, errors);
+    maplen = PyUnicode_GET_LENGTH(mapping);
+    mapdata = PyUnicode_DATA(mapping);
+    mapkind = PyUnicode_KIND(mapping);
 
-    v = PyUnicode_New(size, 127);
-    if (v == NULL)
-        goto onError;
-    if (size == 0)
-        return v;
-    outpos = 0;
     e = s + size;
-    if (PyUnicode_CheckExact(mapping)) {
-        Py_ssize_t maplen;
-        enum PyUnicode_Kind mapkind;
-        void *mapdata;
-        Py_UCS4 x;
 
-        if (PyUnicode_READY(mapping) == -1)
-            return NULL;
+    if (mapkind == PyUnicode_1BYTE_KIND && maplen >= 256) {
+        /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1
+         * is disabled in encoding aliases, latin1 is preferred because
+         * its implementation is faster. */
+        Py_UCS1 *mapdata_ucs1 = (Py_UCS1 *)mapdata;
+        Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
+        Py_UCS4 maxchar = writer->maxchar;
 
-        maplen = PyUnicode_GET_LENGTH(mapping);
-        mapdata = PyUnicode_DATA(mapping);
-        mapkind = PyUnicode_KIND(mapping);
+        assert (writer->kind == PyUnicode_1BYTE_KIND);
         while (s < e) {
-            unsigned char ch;
-            if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
-                enum PyUnicode_Kind outkind = PyUnicode_KIND(v);
-                if (outkind == PyUnicode_1BYTE_KIND) {
-                    void *outdata = PyUnicode_DATA(v);
-                    Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(v);
-                    while (s < e) {
-                        unsigned char ch = *s;
-                        x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch);
-                        if (x > maxchar)
-                            goto Error;
-                        PyUnicode_WRITE(PyUnicode_1BYTE_KIND, outdata, outpos++, x);
-                        ++s;
-                    }
-                    break;
+            ch = *s;
+            x = mapdata_ucs1[ch];
+            if (x > maxchar) {
+                if (_PyUnicodeWriter_Prepare(writer, 1, 0xff) == -1)
+                    goto onError;
+                maxchar = writer->maxchar;
+                outdata = (Py_UCS1 *)writer->data;
+            }
+            outdata[writer->pos] = x;
+            writer->pos++;
+            ++s;
+        }
+        return 0;
+    }
+
+    while (s < e) {
+        if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
+            enum PyUnicode_Kind outkind = writer->kind;
+            Py_UCS2 *mapdata_ucs2 = (Py_UCS2 *)mapdata;
+            if (outkind == PyUnicode_1BYTE_KIND) {
+                Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
+                Py_UCS4 maxchar = writer->maxchar;
+                while (s < e) {
+                    ch = *s;
+                    x = mapdata_ucs2[ch];
+                    if (x > maxchar)
+                        goto Error;
+                    outdata[writer->pos] = x;
+                    writer->pos++;
+                    ++s;
                 }
-                else if (outkind == PyUnicode_2BYTE_KIND) {
-                    void *outdata = PyUnicode_DATA(v);
-                    while (s < e) {
-                        unsigned char ch = *s;
-                        x = PyUnicode_READ(PyUnicode_2BYTE_KIND, mapdata, ch);
-                        if (x == 0xFFFE)
-                            goto Error;
-                        PyUnicode_WRITE(PyUnicode_2BYTE_KIND, outdata, outpos++, x);
-                        ++s;
-                    }
-                    break;
+                break;
+            }
+            else if (outkind == PyUnicode_2BYTE_KIND) {
+                Py_UCS2 *outdata = (Py_UCS2 *)writer->data;
+                while (s < e) {
+                    ch = *s;
+                    x = mapdata_ucs2[ch];
+                    if (x == 0xFFFE)
+                        goto Error;
+                    outdata[writer->pos] = x;
+                    writer->pos++;
+                    ++s;
                 }
+                break;
             }
-            ch = *s;
+        }
+        ch = *s;
 
-            if (ch < maplen)
-                x = PyUnicode_READ(mapkind, mapdata, ch);
-            else
-                x = 0xfffe; /* invalid value */
+        if (ch < maplen)
+            x = PyUnicode_READ(mapkind, mapdata, ch);
+        else
+            x = 0xfffe; /* invalid value */
 Error:
-            if (x == 0xfffe)
-            {
-                /* undefined mapping */
-                startinpos = s-starts;
-                endinpos = startinpos+1;
-                if (unicode_decode_call_errorhandler(
-                        errors, &errorHandler,
-                        "charmap", "character maps to <undefined>",
-                        &starts, &e, &startinpos, &endinpos, &exc, &s,
-                        &v, &outpos)) {
-                    goto onError;
-                }
-                continue;
+        if (x == 0xfffe)
+        {
+            /* undefined mapping */
+            startinpos = s-starts;
+            endinpos = startinpos+1;
+            if (unicode_decode_call_errorhandler_writer(
+                    errors, &errorHandler,
+                    "charmap", "character maps to <undefined>",
+                    &starts, &e, &startinpos, &endinpos, &exc, &s,
+                    writer)) {
+                goto onError;
             }
+            continue;
+        }
+
+        if (_PyUnicodeWriter_WriteCharInline(writer, x) < 0)
+            goto onError;
+        ++s;
+    }
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    return 0;
+
+onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    return -1;
+}
+
+static int
+charmap_decode_mapping(const char *s,
+                       Py_ssize_t size,
+                       PyObject *mapping,
+                       const char *errors,
+                       _PyUnicodeWriter *writer)
+{
+    const char *starts = s;
+    const char *e;
+    Py_ssize_t startinpos, endinpos;
+    PyObject *errorHandler = NULL, *exc = NULL;
+    unsigned char ch;
+    PyObject *key, *item = NULL;
+
+    e = s + size;
+
+    while (s < e) {
+        ch = *s;
 
-            if (unicode_putchar(&v, &outpos, x) < 0)
+        /* Get mapping (char ordinal -> integer, Unicode char or None) */
+        key = PyLong_FromLong((long)ch);
+        if (key == NULL)
+            goto onError;
+
+        item = PyObject_GetItem(mapping, key);
+        Py_DECREF(key);
+        if (item == NULL) {
+            if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+                /* No mapping found means: mapping is undefined. */
+                PyErr_Clear();
+                goto Undefined;
+            } else
                 goto onError;
-            ++s;
         }
-    }
-    else {
-        while (s < e) {
-            unsigned char ch = *s;
-            PyObject *w, *x;
 
-            /* Get mapping (char ordinal -> integer, Unicode char or None) */
-            w = PyLong_FromLong((long)ch);
-            if (w == NULL)
+        /* Apply mapping */
+        if (item == Py_None)
+            goto Undefined;
+        if (PyLong_Check(item)) {
+            long value = PyLong_AS_LONG(item);
+            if (value == 0xFFFE)
+                goto Undefined;
+            if (value < 0 || value > MAX_UNICODE) {
+                PyErr_Format(PyExc_TypeError,
+                             "character mapping must be in range(0x%lx)",
+                             (unsigned long)MAX_UNICODE + 1);
                 goto onError;
-            x = PyObject_GetItem(mapping, w);
-            Py_DECREF(w);
-            if (x == NULL) {
-                if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-                    /* No mapping found means: mapping is undefined. */
-                    PyErr_Clear();
-                    goto Undefined;
-                } else
-                    goto onError;
             }
 
-            /* Apply mapping */
-            if (x == Py_None)
-                goto Undefined;
-            if (PyLong_Check(x)) {
-                long value = PyLong_AS_LONG(x);
+            if (_PyUnicodeWriter_WriteCharInline(writer, value) < 0)
+                goto onError;
+        }
+        else if (PyUnicode_Check(item)) {
+            if (PyUnicode_READY(item) == -1)
+                goto onError;
+            if (PyUnicode_GET_LENGTH(item) == 1) {
+                Py_UCS4 value = PyUnicode_READ_CHAR(item, 0);
                 if (value == 0xFFFE)
                     goto Undefined;
-                if (value < 0 || value > MAX_UNICODE) {
-                    PyErr_Format(PyExc_TypeError,
-                                 "character mapping must be in range(0x%lx)",
-                                 (unsigned long)MAX_UNICODE + 1);
-                    Py_DECREF(x);
-                    goto onError;
-                }
-                if (unicode_putchar(&v, &outpos, value) < 0) {
-                    Py_DECREF(x);
-                    goto onError;
-                }
-            }
-            else if (PyUnicode_Check(x)) {
-                Py_ssize_t targetsize;
-
-                if (PyUnicode_READY(x) == -1) {
-                    Py_DECREF(x);
+                if (_PyUnicodeWriter_WriteCharInline(writer, value) < 0)
                     goto onError;
-                }
-                targetsize = PyUnicode_GET_LENGTH(x);
-
-                if (targetsize == 1) {
-                    /* 1-1 mapping */
-                    Py_UCS4 value = PyUnicode_READ_CHAR(x, 0);
-                    if (value == 0xFFFE)
-                        goto Undefined;
-                    if (unicode_putchar(&v, &outpos, value) < 0) {
-                        Py_DECREF(x);
-                        goto onError;
-                    }
-                }
-                else if (targetsize > 1) {
-                    /* 1-n mapping */
-                    if (targetsize > extrachars) {
-                        /* resize first */
-                        Py_ssize_t needed = (targetsize - extrachars) + \
-                            (targetsize << 2);
-                        extrachars += needed;
-                        /* XXX overflow detection missing */
-                        if (unicode_resize(&v,
-                                           PyUnicode_GET_LENGTH(v) + needed) < 0)
-                        {
-                            Py_DECREF(x);
-                            goto onError;
-                        }
-                    }
-                    if (unicode_widen(&v, outpos,
-                                      PyUnicode_MAX_CHAR_VALUE(x)) < 0) {
-                        Py_DECREF(x);
-                        goto onError;
-                    }
-                    PyUnicode_CopyCharacters(v, outpos, x, 0, targetsize);
-                    outpos += targetsize;
-                    extrachars -= targetsize;
-                }
-                /* 1-0 mapping: skip the character */
             }
             else {
-                /* wrong return value */
-                PyErr_SetString(PyExc_TypeError,
-                                "character mapping must return integer, None or str");
-                Py_DECREF(x);
-                goto onError;
+                writer->overallocate = 1;
+                if (_PyUnicodeWriter_WriteStr(writer, item) == -1)
+                    goto onError;
             }
-            Py_DECREF(x);
-            ++s;
-            continue;
+        }
+        else {
+            /* wrong return value */
+            PyErr_SetString(PyExc_TypeError,
+                            "character mapping must return integer, None or str");
+            goto onError;
+        }
+        Py_CLEAR(item);
+        ++s;
+        continue;
+
 Undefined:
-            /* undefined mapping */
-            Py_XDECREF(x);
-            startinpos = s-starts;
-            endinpos = startinpos+1;
-            if (unicode_decode_call_errorhandler(
-                    errors, &errorHandler,
-                    "charmap", "character maps to <undefined>",
-                    &starts, &e, &startinpos, &endinpos, &exc, &s,
-                    &v, &outpos)) {
-                goto onError;
-            }
+        /* undefined mapping */
+        Py_CLEAR(item);
+        startinpos = s-starts;
+        endinpos = startinpos+1;
+        if (unicode_decode_call_errorhandler_writer(
+                errors, &errorHandler,
+                "charmap", "character maps to <undefined>",
+                &starts, &e, &startinpos, &endinpos, &exc, &s,
+                writer)) {
+            goto onError;
         }
     }
-    if (unicode_resize(&v, outpos) < 0)
-        goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    return unicode_result(v);
+    return 0;
 
-  onError:
+onError:
+    Py_XDECREF(item);
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
-    Py_XDECREF(v);
+    return -1;
+}
+
+PyObject *
+PyUnicode_DecodeCharmap(const char *s,
+                        Py_ssize_t size,
+                        PyObject *mapping,
+                        const char *errors)
+{
+    _PyUnicodeWriter writer;
+
+    /* Default to Latin-1 */
+    if (mapping == NULL)
+        return PyUnicode_DecodeLatin1(s, size, errors);
+
+    if (size == 0)
+        _Py_RETURN_UNICODE_EMPTY();
+    _PyUnicodeWriter_Init(&writer);
+    writer.min_length = size;
+    if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
+        goto onError;
+
+    if (PyUnicode_CheckExact(mapping)) {
+        if (charmap_decode_string(s, size, mapping, errors, &writer) < 0)
+            goto onError;
+    }
+    else {
+        if (charmap_decode_mapping(s, size, mapping, errors, &writer) < 0)
+            goto onError;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
+
+  onError:
+    _PyUnicodeWriter_Dealloc(&writer);
     return NULL;
 }
 
@@ -8163,10 +8343,14 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
      * -1=not initialized, 0=unknown, 1=strict, 2=replace,
      * 3=ignore, 4=xmlcharrefreplace */
     int known_errorHandler = -1;
+    void *data;
+    int kind;
 
     if (PyUnicode_READY(unicode) == -1)
         return NULL;
     size = PyUnicode_GET_LENGTH(unicode);
+    data = PyUnicode_DATA(unicode);
+    kind = PyUnicode_KIND(unicode);
 
     /* Default to Latin-1 */
     if (mapping == NULL)
@@ -8181,7 +8365,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
         return res;
 
     while (inpos<size) {
-        Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, inpos);
+        Py_UCS4 ch = PyUnicode_READ(kind, data, inpos);
         /* try to encode it */
         charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
         if (x==enc_EXCEPTION) /* error */
@@ -8266,19 +8450,6 @@ make_translate_exception(PyObject **exceptionObject,
     }
 }
 
-/* raises a UnicodeTranslateError */
-static void
-raise_translate_exception(PyObject **exceptionObject,
-                          PyObject *unicode,
-                          Py_ssize_t startpos, Py_ssize_t endpos,
-                          const char *reason)
-{
-    make_translate_exception(exceptionObject,
-                             unicode, startpos, endpos, reason);
-    if (*exceptionObject != NULL)
-        PyCodec_StrictErrors(*exceptionObject);
-}
-
 /* error handling callback helper:
    build arguments, call the callback and check the arguments,
    put the result into newpos and return the replacement string, which
@@ -8554,8 +8725,10 @@ _PyUnicode_TranslateCharmap(PyObject *input,
             }
             switch (known_errorHandler) {
             case 1: /* strict */
-                raise_translate_exception(&exc, input, collstart,
-                                          collend, reason);
+                make_translate_exception(&exc,
+                                         input, collstart, collend, reason);
+                if (exc != NULL)
+                    PyCodec_StrictErrors(exc);
                 goto onError;
             case 2: /* replace */
                 /* No need to check for space, this is a 1:1 replacement */
@@ -9147,7 +9320,7 @@ tailmatch(PyObject *self,
 
     if (PyUnicode_READY(self) == -1 ||
         PyUnicode_READY(substring) == -1)
-        return 0;
+        return -1;
 
     if (PyUnicode_GET_LENGTH(substring) == 0)
         return 1;
@@ -9185,7 +9358,6 @@ tailmatch(PyObject *self,
             /* We do not need to compare 0 and len(substring)-1 because
                the if statement above ensured already that they are equal
                when we end up here. */
-            /* TODO: honor direction and do a forward or backwards search */
             for (i = 1; i < end_sub; ++i) {
                 if (PyUnicode_READ(kind_self, data_self, offset + i) !=
                     PyUnicode_READ(kind_sub, data_sub, i))
@@ -9309,7 +9481,7 @@ handle_capital_sigma(int kind, void *data, Py_ssize_t length, Py_ssize_t i)
 {
     Py_ssize_t j;
     int final_sigma;
-    Py_UCS4 c;
+    Py_UCS4 c = 0;
     /* U+03A3 is in the Final_Sigma context when, it is found like this:
 
      \p{cased}\p{case-ignorable}*U+03A3!(\p{case-ignorable}*\p{cased})
@@ -9651,41 +9823,49 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
             sep_data = PyUnicode_1BYTE_DATA(sep);
     }
 #endif
-    for (i = 0, res_offset = 0; i < seqlen; ++i) {
-        Py_ssize_t itemlen;
-        item = items[i];
-        /* Copy item, and maybe the separator. */
-        if (i && seplen != 0) {
-            if (use_memcpy) {
+    if (use_memcpy) {
+        for (i = 0; i < seqlen; ++i) {
+            Py_ssize_t itemlen;
+            item = items[i];
+
+            /* Copy item, and maybe the separator. */
+            if (i && seplen != 0) {
                 Py_MEMCPY(res_data,
                           sep_data,
                           kind * seplen);
                 res_data += kind * seplen;
             }
-            else {
-                _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
-                res_offset += seplen;
-            }
-        }
-        itemlen = PyUnicode_GET_LENGTH(item);
-        if (itemlen != 0) {
-            if (use_memcpy) {
+
+            itemlen = PyUnicode_GET_LENGTH(item);
+            if (itemlen != 0) {
                 Py_MEMCPY(res_data,
                           PyUnicode_DATA(item),
                           kind * itemlen);
                 res_data += kind * itemlen;
             }
-            else {
+        }
+        assert(res_data == PyUnicode_1BYTE_DATA(res)
+                           + kind * PyUnicode_GET_LENGTH(res));
+    }
+    else {
+        for (i = 0, res_offset = 0; i < seqlen; ++i) {
+            Py_ssize_t itemlen;
+            item = items[i];
+
+            /* Copy item, and maybe the separator. */
+            if (i && seplen != 0) {
+                _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
+                res_offset += seplen;
+            }
+
+            itemlen = PyUnicode_GET_LENGTH(item);
+            if (itemlen != 0) {
                 _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen);
                 res_offset += itemlen;
             }
         }
-    }
-    if (use_memcpy)
-        assert(res_data == PyUnicode_1BYTE_DATA(res)
-                           + kind * PyUnicode_GET_LENGTH(res));
-    else
         assert(res_offset == PyUnicode_GET_LENGTH(res));
+    }
 
     Py_DECREF(fseq);
     Py_XDECREF(sep);
@@ -9718,8 +9898,8 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
             Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \
             for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
             break; \
-        default: assert(0); \
         } \
+        default: assert(0); \
         } \
     } while (0)
 
@@ -10077,6 +10257,31 @@ anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen,
     return 0;
 }
 
+static void
+replace_1char_inplace(PyObject *u, Py_ssize_t pos,
+                      Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+    int kind = PyUnicode_KIND(u);
+    void *data = PyUnicode_DATA(u);
+    Py_ssize_t len = PyUnicode_GET_LENGTH(u);
+    if (kind == PyUnicode_1BYTE_KIND) {
+        ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos,
+                                      (Py_UCS1 *)data + len,
+                                      u1, u2, maxcount);
+    }
+    else if (kind == PyUnicode_2BYTE_KIND) {
+        ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos,
+                                      (Py_UCS2 *)data + len,
+                                      u1, u2, maxcount);
+    }
+    else {
+        assert(kind == PyUnicode_4BYTE_KIND);
+        ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos,
+                                      (Py_UCS4 *)data + len,
+                                      u1, u2, maxcount);
+    }
+}
+
 static PyObject *
 replace(PyObject *self, PyObject *str1,
         PyObject *str2, Py_ssize_t maxcount)
@@ -10093,7 +10298,7 @@ replace(PyObject *self, PyObject *str1,
     Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1);
     Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2);
     int mayshrink;
-    Py_UCS4 maxchar, maxchar_str2;
+    Py_UCS4 maxchar, maxchar_str1, maxchar_str2;
 
     if (maxcount < 0)
         maxcount = PY_SSIZE_T_MAX;
@@ -10102,15 +10307,16 @@ replace(PyObject *self, PyObject *str1,
 
     if (str1 == str2)
         goto nothing;
-    if (skind < kind1)
-        /* substring too wide to be present */
-        goto nothing;
 
     maxchar = PyUnicode_MAX_CHAR_VALUE(self);
+    maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1);
+    if (maxchar < maxchar_str1)
+        /* substring too wide to be present */
+        goto nothing;
     maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2);
     /* Replacing str1 with str2 may cause a maxchar reduction in the
        result string. */
-    mayshrink = (maxchar_str2 < maxchar);
+    mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1);
     maxchar = Py_MAX(maxchar, maxchar_str2);
 
     if (len1 == len2) {
@@ -10120,35 +10326,19 @@ replace(PyObject *self, PyObject *str1,
         if (len1 == 1) {
             /* replace characters */
             Py_UCS4 u1, u2;
-            int rkind;
-            Py_ssize_t index, pos;
-            char *src;
+            Py_ssize_t pos;
 
-            u1 = PyUnicode_READ_CHAR(str1, 0);
-            pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1);
+            u1 = PyUnicode_READ(kind1, buf1, 0);
+            pos = findchar(sbuf, skind, slen, u1, 1);
             if (pos < 0)
                 goto nothing;
-            u2 = PyUnicode_READ_CHAR(str2, 0);
+            u2 = PyUnicode_READ(kind2, buf2, 0);
             u = PyUnicode_New(slen, maxchar);
             if (!u)
                 goto error;
-            _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
-            rkind = PyUnicode_KIND(u);
 
-            PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2);
-            index = 0;
-            src = sbuf;
-            while (--maxcount)
-            {
-                pos++;
-                src += pos * PyUnicode_KIND(self);
-                slen -= pos;
-                index += pos;
-                pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1);
-                if (pos < 0)
-                    break;
-                PyUnicode_WRITE(rkind, PyUnicode_DATA(u), index + pos, u2);
-            }
+            _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
+            replace_1char_inplace(u, pos, u1, u2, maxcount);
         }
         else {
             int rkind = skind;
@@ -10460,9 +10650,24 @@ unicode_center(PyObject *self, PyObject *args)
 static int
 unicode_compare(PyObject *str1, PyObject *str2)
 {
+#define COMPARE(TYPE1, TYPE2) \
+    do { \
+        TYPE1* p1 = (TYPE1 *)data1; \
+        TYPE2* p2 = (TYPE2 *)data2; \
+        TYPE1* end = p1 + len; \
+        Py_UCS4 c1, c2; \
+        for (; p1 != end; p1++, p2++) { \
+            c1 = *p1; \
+            c2 = *p2; \
+            if (c1 != c2) \
+                return (c1 < c2) ? -1 : 1; \
+        } \
+    } \
+    while (0)
+
     int kind1, kind2;
     void *data1, *data2;
-    Py_ssize_t len1, len2, i;
+    Py_ssize_t len1, len2, len;
 
     kind1 = PyUnicode_KIND(str1);
     kind2 = PyUnicode_KIND(str2);
@@ -10470,19 +10675,116 @@ unicode_compare(PyObject *str1, PyObject *str2)
     data2 = PyUnicode_DATA(str2);
     len1 = PyUnicode_GET_LENGTH(str1);
     len2 = PyUnicode_GET_LENGTH(str2);
+    len = Py_MIN(len1, len2);
 
-    for (i = 0; i < len1 && i < len2; ++i) {
-        Py_UCS4 c1, c2;
-        c1 = PyUnicode_READ(kind1, data1, i);
-        c2 = PyUnicode_READ(kind2, data2, i);
-
-        if (c1 != c2)
-            return (c1 < c2) ? -1 : 1;
+    switch(kind1) {
+    case PyUnicode_1BYTE_KIND:
+    {
+        switch(kind2) {
+        case PyUnicode_1BYTE_KIND:
+        {
+            int cmp = memcmp(data1, data2, len);
+            /* normalize result of memcmp() into the range [-1; 1] */
+            if (cmp < 0)
+                return -1;
+            if (cmp > 0)
+                return 1;
+            break;
+        }
+        case PyUnicode_2BYTE_KIND:
+            COMPARE(Py_UCS1, Py_UCS2);
+            break;
+        case PyUnicode_4BYTE_KIND:
+            COMPARE(Py_UCS1, Py_UCS4);
+            break;
+        default:
+            assert(0);
+        }
+        break;
     }
+    case PyUnicode_2BYTE_KIND:
+    {
+        switch(kind2) {
+        case PyUnicode_1BYTE_KIND:
+            COMPARE(Py_UCS2, Py_UCS1);
+            break;
+        case PyUnicode_2BYTE_KIND:
+        {
+            COMPARE(Py_UCS2, Py_UCS2);
+            break;
+        }
+        case PyUnicode_4BYTE_KIND:
+            COMPARE(Py_UCS2, Py_UCS4);
+            break;
+        default:
+            assert(0);
+        }
+        break;
+    }
+    case PyUnicode_4BYTE_KIND:
+    {
+        switch(kind2) {
+        case PyUnicode_1BYTE_KIND:
+            COMPARE(Py_UCS4, Py_UCS1);
+            break;
+        case PyUnicode_2BYTE_KIND:
+            COMPARE(Py_UCS4, Py_UCS2);
+            break;
+        case PyUnicode_4BYTE_KIND:
+        {
+#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 4
+            int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len);
+            /* normalize result of wmemcmp() into the range [-1; 1] */
+            if (cmp < 0)
+                return -1;
+            if (cmp > 0)
+                return 1;
+#else
+            COMPARE(Py_UCS4, Py_UCS4);
+#endif
+            break;
+        }
+        default:
+            assert(0);
+        }
+        break;
+    }
+    default:
+        assert(0);
+    }
+
+    if (len1 == len2)
+        return 0;
+    if (len1 < len2)
+        return -1;
+    else
+        return 1;
+
+#undef COMPARE
+}
+
+Py_LOCAL(int)
+unicode_compare_eq(PyObject *str1, PyObject *str2)
+{
+    int kind;
+    void *data1, *data2;
+    Py_ssize_t len;
+    int cmp;
+
+    len = PyUnicode_GET_LENGTH(str1);
+    if (PyUnicode_GET_LENGTH(str2) != len)
+        return 0;
+    kind = PyUnicode_KIND(str1);
+    if (PyUnicode_KIND(str2) != kind)
+        return 0;
+    data1 = PyUnicode_DATA(str1);
+    data2 = PyUnicode_DATA(str2);
 
-    return (len1 < len2) ? -1 : (len1 != len2);
+    cmp = memcmp(data1, data2, len * kind);
+    return (cmp == 0);
 }
 
+
 int
 PyUnicode_Compare(PyObject *left, PyObject *right)
 {
@@ -10490,6 +10792,11 @@ PyUnicode_Compare(PyObject *left, PyObject *right)
         if (PyUnicode_READY(left) == -1 ||
             PyUnicode_READY(right) == -1)
             return -1;
+
+        /* a string is equal to itself */
+        if (left == right)
+            return 0;
+
         return unicode_compare(left, right);
     }
     PyErr_Format(PyExc_TypeError,
@@ -10500,29 +10807,59 @@ PyUnicode_Compare(PyObject *left, PyObject *right)
 }
 
 int
+_PyUnicode_CompareWithId(PyObject *left, _Py_Identifier *right)
+{
+    PyObject *right_str = _PyUnicode_FromId(right);   /* borrowed */
+    if (right_str == NULL)
+        return -1;
+    return PyUnicode_Compare(left, right_str);
+}
+
+int
 PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
 {
     Py_ssize_t i;
     int kind;
-    void *data;
     Py_UCS4 chr;
 
     assert(_PyUnicode_CHECK(uni));
     if (PyUnicode_READY(uni) == -1)
         return -1;
     kind = PyUnicode_KIND(uni);
-    data = PyUnicode_DATA(uni);
-    /* Compare Unicode string and source character set string */
-    for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
-        if (chr != str[i])
-            return (chr < (unsigned char)(str[i])) ? -1 : 1;
-    /* This check keeps Python strings that end in '\0' from comparing equal
-     to C strings identical up to that point. */
-    if (PyUnicode_GET_LENGTH(uni) != i || chr)
-        return 1; /* uni is longer */
-    if (str[i])
-        return -1; /* str is longer */
-    return 0;
+    if (kind == PyUnicode_1BYTE_KIND) {
+        const void *data = PyUnicode_1BYTE_DATA(uni);
+        size_t len1 = (size_t)PyUnicode_GET_LENGTH(uni);
+        size_t len, len2 = strlen(str);
+        int cmp;
+
+        len = Py_MIN(len1, len2);
+        cmp = memcmp(data, str, len);
+        if (cmp != 0) {
+            if (cmp < 0)
+                return -1;
+            else
+                return 1;
+        }
+        if (len1 > len2)
+            return 1; /* uni is longer */
+        if (len2 > len1)
+            return -1; /* str is longer */
+        return 0;
+    }
+    else {
+        void *data = PyUnicode_DATA(uni);
+        /* Compare Unicode string and source character set string */
+        for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
+            if (chr != str[i])
+                return (chr < (unsigned char)(str[i])) ? -1 : 1;
+        /* This check keeps Python strings that end in '\0' from comparing equal
+         to C strings identical up to that point. */
+        if (PyUnicode_GET_LENGTH(uni) != i || chr)
+            return 1; /* uni is longer */
+        if (str[i])
+            return -1; /* str is longer */
+        return 0;
+    }
 }
 
 
@@ -10533,36 +10870,43 @@ PyObject *
 PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
 {
     int result;
+    PyObject *v;
 
-    if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
-        PyObject *v;
-        if (PyUnicode_READY(left) == -1 ||
-            PyUnicode_READY(right) == -1)
-            return NULL;
-        if (PyUnicode_GET_LENGTH(left) != PyUnicode_GET_LENGTH(right) ||
-            PyUnicode_KIND(left) != PyUnicode_KIND(right)) {
-            if (op == Py_EQ) {
-                Py_INCREF(Py_False);
-                return Py_False;
-            }
-            if (op == Py_NE) {
-                Py_INCREF(Py_True);
-                return Py_True;
-            }
-        }
-        if (left == right)
-            result = 0;
-        else
-            result = unicode_compare(left, right);
+    if (!PyUnicode_Check(left) || !PyUnicode_Check(right))
+        Py_RETURN_NOTIMPLEMENTED;
 
-        /* Convert the return value to a Boolean */
+    if (PyUnicode_READY(left) == -1 ||
+        PyUnicode_READY(right) == -1)
+        return NULL;
+
+    if (left == right) {
         switch (op) {
         case Py_EQ:
-            v = TEST_COND(result == 0);
+        case Py_LE:
+        case Py_GE:
+            /* a string is equal to itself */
+            v = Py_True;
             break;
         case Py_NE:
-            v = TEST_COND(result != 0);
+        case Py_LT:
+        case Py_GT:
+            v = Py_False;
             break;
+        default:
+            PyErr_BadArgument();
+            return NULL;
+        }
+    }
+    else if (op == Py_EQ || op == Py_NE) {
+        result = unicode_compare_eq(left, right);
+        result ^= (op == Py_NE);
+        v = TEST_COND(result);
+    }
+    else {
+        result = unicode_compare(left, right);
+
+        /* Convert the return value to a Boolean */
+        switch (op) {
         case Py_LE:
             v = TEST_COND(result <= 0);
             break;
@@ -10579,18 +10923,16 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
             PyErr_BadArgument();
             return NULL;
         }
-        Py_INCREF(v);
-        return v;
     }
-
-    Py_RETURN_NOTIMPLEMENTED;
+    Py_INCREF(v);
+    return v;
 }
 
 int
 PyUnicode_Contains(PyObject *container, PyObject *element)
 {
     PyObject *str, *sub;
-    int kind1, kind2, kind;
+    int kind1, kind2;
     void *buf1, *buf2;
     Py_ssize_t len1, len2;
     int result;
@@ -10609,23 +10951,18 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
         Py_DECREF(sub);
         return -1;
     }
-    if (PyUnicode_READY(sub) == -1 || PyUnicode_READY(str) == -1) {
-        Py_DECREF(sub);
-        Py_DECREF(str);
-    }
 
     kind1 = PyUnicode_KIND(str);
     kind2 = PyUnicode_KIND(sub);
-    kind = kind1;
     buf1 = PyUnicode_DATA(str);
     buf2 = PyUnicode_DATA(sub);
-    if (kind2 != kind) {
-        if (kind2 > kind) {
+    if (kind2 != kind1) {
+        if (kind2 > kind1) {
             Py_DECREF(sub);
             Py_DECREF(str);
             return 0;
         }
-        buf2 = _PyUnicode_AsKind(sub, kind);
+        buf2 = _PyUnicode_AsKind(sub, kind1);
     }
     if (!buf2) {
         Py_DECREF(sub);
@@ -10635,7 +10972,7 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
     len1 = PyUnicode_GET_LENGTH(str);
     len2 = PyUnicode_GET_LENGTH(sub);
 
-    switch (kind) {
+    switch (kind1) {
     case PyUnicode_1BYTE_KIND:
         result = ucs1lib_find(buf1, len1, buf2, len2, 0) != -1;
         break;
@@ -10653,7 +10990,7 @@ PyUnicode_Contains(PyObject *container, PyObject *element)
     Py_DECREF(str);
     Py_DECREF(sub);
 
-    if (kind2 != kind)
+    if (kind2 != kind1)
         PyMem_Free(buf2);
 
     return result;
@@ -10729,7 +11066,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
         return;
     }
     left = *p_left;
-    if (right == NULL || left == NULL || !PyUnicode_Check(left)) {
+    if (right == NULL || left == NULL
+        || !PyUnicode_Check(left) || !PyUnicode_Check(right)) {
         if (!PyErr_Occurred())
             PyErr_BadInternalCall();
         goto error;
@@ -10769,15 +11107,9 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
         && !(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right)))
     {
         /* append inplace */
-        if (unicode_resize(p_left, new_len) != 0) {
-            /* XXX if _PyUnicode_Resize() fails, 'left' has been
-             * deallocated so it cannot be put back into
-             * 'variable'.  The MemoryError is raised when there
-             * is no value in 'variable', which might (very
-             * remotely) be a cause of incompatibilities.
-             */
+        if (unicode_resize(p_left, new_len) != 0)
             goto error;
-        }
+
         /* copy 'right' into the newly allocated area of 'left' */
         _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len);
     }
@@ -10819,7 +11151,7 @@ interpreted as in slice notation.");
 static PyObject *
 unicode_count(PyObject *self, PyObject *args)
 {
-    PyObject *substring;
+    PyObject *substring = NULL;
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
     PyObject *result;
@@ -10833,8 +11165,10 @@ unicode_count(PyObject *self, PyObject *args)
 
     kind1 = PyUnicode_KIND(self);
     kind2 = PyUnicode_KIND(substring);
-    if (kind2 > kind1)
+    if (kind2 > kind1) {
+        Py_DECREF(substring);
         return PyLong_FromLong(0);
+    }
     kind = kind1;
     buf1 = PyUnicode_DATA(self);
     buf2 = PyUnicode_DATA(substring);
@@ -10905,23 +11239,25 @@ unicode_encode(PyObject *self, PyObject *args, PyObject *kwargs)
 }
 
 PyDoc_STRVAR(expandtabs__doc__,
-             "S.expandtabs([tabsize]) -> str\n\
+             "S.expandtabs(tabsize=8) -> str\n\
 \n\
 Return a copy of S where all tab characters are expanded using spaces.\n\
 If tabsize is not given, a tab size of 8 characters is assumed.");
 
 static PyObject*
-unicode_expandtabs(PyObject *self, PyObject *args)
+unicode_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
 {
     Py_ssize_t i, j, line_pos, src_len, incr;
     Py_UCS4 ch;
     PyObject *u;
     void *src_data, *dest_data;
+    static char *kwlist[] = {"tabsize", 0};
     int tabsize = 8;
     int kind;
     int found;
 
-    if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs",
+                                     kwlist, &tabsize))
         return NULL;
 
     if (PyUnicode_READY(self) == -1)
@@ -11003,19 +11339,23 @@ Return -1 on failure.");
 static PyObject *
 unicode_find(PyObject *self, PyObject *args)
 {
-    PyObject *substring;
-    Py_ssize_t start;
-    Py_ssize_t end;
+    PyObject *substring = NULL;
+    Py_ssize_t start = 0;
+    Py_ssize_t end = 0;
     Py_ssize_t result;
 
     if (!stringlib_parse_args_finds_unicode("find", args, &substring,
                                             &start, &end))
         return NULL;
 
-    if (PyUnicode_READY(self) == -1)
+    if (PyUnicode_READY(self) == -1) {
+        Py_DECREF(substring);
         return NULL;
-    if (PyUnicode_READY(substring) == -1)
+    }
+    if (PyUnicode_READY(substring) == -1) {
+        Py_DECREF(substring);
         return NULL;
+    }
 
     result = any_find_slice(1, self, substring, start, end);
 
@@ -11033,7 +11373,6 @@ unicode_getitem(PyObject *self, Py_ssize_t index)
     void *data;
     enum PyUnicode_Kind kind;
     Py_UCS4 ch;
-    PyObject *res;
 
     if (!PyUnicode_Check(self) || PyUnicode_READY(self) == -1) {
         PyErr_BadArgument();
@@ -11046,17 +11385,7 @@ unicode_getitem(PyObject *self, Py_ssize_t index)
     kind = PyUnicode_KIND(self);
     data = PyUnicode_DATA(self);
     ch = PyUnicode_READ(kind, data, index);
-    if (ch < 256)
-        return get_latin1_char(ch);
-
-    res = PyUnicode_New(1, ch);
-    if (res == NULL)
-        return NULL;
-    kind = PyUnicode_KIND(res);
-    data = PyUnicode_DATA(res);
-    PyUnicode_WRITE(kind, data, 0, ch);
-    assert(_PyUnicode_CheckConsistency(res, 1));
-    return res;
+    return unicode_char(ch);
 }
 
 /* Believe it or not, this produces the same value for ASCII strings
@@ -11083,43 +11412,11 @@ unicode_hash(PyObject *self)
         _PyUnicode_HASH(self) = 0;
         return 0;
     }
-
-    /* The hash function as a macro, gets expanded three times below. */
-#define HASH(P)                                            \
-    x ^= (Py_uhash_t) *P << 7;                             \
-    while (--len >= 0)                                     \
-        x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *P++;  \
-
-    x = (Py_uhash_t) _Py_HashSecret.prefix;
-    switch (PyUnicode_KIND(self)) {
-    case PyUnicode_1BYTE_KIND: {
-        const unsigned char *c = PyUnicode_1BYTE_DATA(self);
-        HASH(c);
-        break;
-    }
-    case PyUnicode_2BYTE_KIND: {
-        const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self);
-        HASH(s);
-        break;
-    }
-    default: {
-        Py_UCS4 *l;
-        assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND &&
-               "Impossible switch case in unicode_hash");
-        l = PyUnicode_4BYTE_DATA(self);
-        HASH(l);
-        break;
-    }
-    }
-    x ^= (Py_uhash_t) PyUnicode_GET_LENGTH(self);
-    x ^= (Py_uhash_t) _Py_HashSecret.suffix;
-
-    if (x == -1)
-        x = -2;
+    x = _Py_HashBytes(PyUnicode_DATA(self),
+                      PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
     _PyUnicode_HASH(self) = x;
     return x;
 }
-#undef HASH
 
 PyDoc_STRVAR(index__doc__,
              "S.index(sub[, start[, end]]) -> int\n\
@@ -11130,18 +11427,22 @@ static PyObject *
 unicode_index(PyObject *self, PyObject *args)
 {
     Py_ssize_t result;
-    PyObject *substring;
-    Py_ssize_t start;
-    Py_ssize_t end;
+    PyObject *substring = NULL;
+    Py_ssize_t start = 0;
+    Py_ssize_t end = 0;
 
     if (!stringlib_parse_args_finds_unicode("index", args, &substring,
                                             &start, &end))
         return NULL;
 
-    if (PyUnicode_READY(self) == -1)
+    if (PyUnicode_READY(self) == -1) {
+        Py_DECREF(substring);
         return NULL;
-    if (PyUnicode_READY(substring) == -1)
+    }
+    if (PyUnicode_READY(substring) == -1) {
+        Py_DECREF(substring);
         return NULL;
+    }
 
     result = any_find_slice(1, self, substring, start, end);
 
@@ -11671,6 +11972,7 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
     int kind;
     Py_ssize_t i, j, len;
     BLOOM_MASK sepmask;
+    Py_ssize_t seplen;
 
     if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1)
         return NULL;
@@ -11678,24 +11980,35 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
     kind = PyUnicode_KIND(self);
     data = PyUnicode_DATA(self);
     len = PyUnicode_GET_LENGTH(self);
+    seplen = PyUnicode_GET_LENGTH(sepobj);
     sepmask = make_bloom_mask(PyUnicode_KIND(sepobj),
                               PyUnicode_DATA(sepobj),
-                              PyUnicode_GET_LENGTH(sepobj));
+                              seplen);
 
     i = 0;
     if (striptype != RIGHTSTRIP) {
-        while (i < len &&
-               BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, i), sepobj)) {
+        while (i < len) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+            if (!BLOOM(sepmask, ch))
+                break;
+            if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
+                break;
             i++;
         }
     }
 
     j = len;
     if (striptype != LEFTSTRIP) {
-        do {
+        j--;
+        while (j >= i) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+            if (!BLOOM(sepmask, ch))
+                break;
+            if (PyUnicode_FindChar(sepobj, ch, 0, seplen, 1) < 0)
+                break;
             j--;
-        } while (j >= i &&
-                 BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, j), sepobj));
+        }
+
         j++;
     }
 
@@ -11742,30 +12055,63 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
 static PyObject *
 do_strip(PyObject *self, int striptype)
 {
-    int kind;
-    void *data;
     Py_ssize_t len, i, j;
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
 
-    kind = PyUnicode_KIND(self);
-    data = PyUnicode_DATA(self);
     len = PyUnicode_GET_LENGTH(self);
 
-    i = 0;
-    if (striptype != RIGHTSTRIP) {
-        while (i < len && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
-            i++;
+    if (PyUnicode_IS_ASCII(self)) {
+        Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
+
+        i = 0;
+        if (striptype != RIGHTSTRIP) {
+            while (i < len) {
+                Py_UCS1 ch = data[i];
+                if (!_Py_ascii_whitespace[ch])
+                    break;
+                i++;
+            }
+        }
+
+        j = len;
+        if (striptype != LEFTSTRIP) {
+            j--;
+            while (j >= i) {
+                Py_UCS1 ch = data[j];
+                if (!_Py_ascii_whitespace[ch])
+                    break;
+                j--;
+            }
+            j++;
         }
     }
+    else {
+        int kind = PyUnicode_KIND(self);
+        void *data = PyUnicode_DATA(self);
 
-    j = len;
-    if (striptype != LEFTSTRIP) {
-        do {
+        i = 0;
+        if (striptype != RIGHTSTRIP) {
+            while (i < len) {
+                Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+                if (!Py_UNICODE_ISSPACE(ch))
+                    break;
+                i++;
+            }
+        }
+
+        j = len;
+        if (striptype != LEFTSTRIP) {
             j--;
-        } while (j >= i && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j)));
-        j++;
+            while (j >= i) {
+                Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+                if (!Py_UNICODE_ISSPACE(ch))
+                    break;
+                j--;
+            }
+            j++;
+        }
     }
 
     return PyUnicode_Substring(self, i, j);
@@ -11777,7 +12123,7 @@ do_argstrip(PyObject *self, int striptype, PyObject *args)
 {
     PyObject *sep = NULL;
 
-    if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
+    if (!PyArg_ParseTuple(args, stripformat[striptype], &sep))
         return NULL;
 
     if (sep != NULL && sep != Py_None) {
@@ -11989,7 +12335,7 @@ unicode_repr(PyObject *unicode)
     Py_ssize_t isize;
     Py_ssize_t osize, squote, dquote, i, o;
     Py_UCS4 max, quote;
-    int ikind, okind;
+    int ikind, okind, unchanged;
     void *idata, *odata;
 
     if (PyUnicode_READY(unicode) == -1)
@@ -12000,7 +12346,7 @@ unicode_repr(PyObject *unicode)
 
     /* Compute length of output, quote characters, and
        maximum character */
-    osize = 2; /* quotes */
+    osize = 0;
     max = 127;
     squote = dquote = 0;
     ikind = PyUnicode_KIND(unicode);
@@ -12037,7 +12383,9 @@ unicode_repr(PyObject *unicode)
     }
 
     quote = '\'';
+    unchanged = (osize == isize);
     if (squote) {
+        unchanged = 0;
         if (dquote)
             /* Both squote and dquote present. Use squote,
                and escape them */
@@ -12045,6 +12393,7 @@ unicode_repr(PyObject *unicode)
         else
             quote = '"';
     }
+    osize += 2;   /* quotes */
 
     repr = PyUnicode_New(osize, max);
     if (repr == NULL)
@@ -12054,82 +12403,88 @@ unicode_repr(PyObject *unicode)
 
     PyUnicode_WRITE(okind, odata, 0, quote);
     PyUnicode_WRITE(okind, odata, osize-1, quote);
+    if (unchanged) {
+        _PyUnicode_FastCopyCharacters(repr, 1,
+                                      unicode, 0,
+                                      isize);
+    }
+    else {
+        for (i = 0, o = 1; i < isize; i++) {
+            Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
 
-    for (i = 0, o = 1; i < isize; i++) {
-        Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
-
-        /* Escape quotes and backslashes */
-        if ((ch == quote) || (ch == '\\')) {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, ch);
-            continue;
-        }
+            /* Escape quotes and backslashes */
+            if ((ch == quote) || (ch == '\\')) {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, ch);
+                continue;
+            }
 
-        /* Map special whitespace to '\t', \n', '\r' */
-        if (ch == '\t') {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 't');
-        }
-        else if (ch == '\n') {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 'n');
-        }
-        else if (ch == '\r') {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 'r');
-        }
+            /* Map special whitespace to '\t', \n', '\r' */
+            if (ch == '\t') {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 't');
+            }
+            else if (ch == '\n') {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 'n');
+            }
+            else if (ch == '\r') {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 'r');
+            }
 
-        /* Map non-printable US ASCII to '\xhh' */
-        else if (ch < ' ' || ch == 0x7F) {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 'x');
-            PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
-            PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
-        }
+            /* Map non-printable US ASCII to '\xhh' */
+            else if (ch < ' ' || ch == 0x7F) {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 'x');
+                PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
+                PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
+            }
 
-        /* Copy ASCII characters as-is */
-        else if (ch < 0x7F) {
-            PyUnicode_WRITE(okind, odata, o++, ch);
-        }
+            /* Copy ASCII characters as-is */
+            else if (ch < 0x7F) {
+                PyUnicode_WRITE(okind, odata, o++, ch);
+            }
 
-        /* Non-ASCII characters */
-        else {
-            /* Map Unicode whitespace and control characters
-               (categories Z* and C* except ASCII space)
-            */
-            if (!Py_UNICODE_ISPRINTABLE(ch)) {
-                PyUnicode_WRITE(okind, odata, o++, '\\');
-                /* Map 8-bit characters to '\xhh' */
-                if (ch <= 0xff) {
-                    PyUnicode_WRITE(okind, odata, o++, 'x');
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
-                }
-                /* Map 16-bit characters to '\uxxxx' */
-                else if (ch <= 0xffff) {
-                    PyUnicode_WRITE(okind, odata, o++, 'u');
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+            /* Non-ASCII characters */
+            else {
+                /* Map Unicode whitespace and control characters
+                   (categories Z* and C* except ASCII space)
+                */
+                if (!Py_UNICODE_ISPRINTABLE(ch)) {
+                    PyUnicode_WRITE(okind, odata, o++, '\\');
+                    /* Map 8-bit characters to '\xhh' */
+                    if (ch <= 0xff) {
+                        PyUnicode_WRITE(okind, odata, o++, 'x');
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
+                    }
+                    /* Map 16-bit characters to '\uxxxx' */
+                    else if (ch <= 0xffff) {
+                        PyUnicode_WRITE(okind, odata, o++, 'u');
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+                    }
+                    /* Map 21-bit characters to '\U00xxxxxx' */
+                    else {
+                        PyUnicode_WRITE(okind, odata, o++, 'U');
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+                    }
                 }
-                /* Map 21-bit characters to '\U00xxxxxx' */
+                /* Copy characters as-is */
                 else {
-                    PyUnicode_WRITE(okind, odata, o++, 'U');
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+                    PyUnicode_WRITE(okind, odata, o++, ch);
                 }
             }
-            /* Copy characters as-is */
-            else {
-                PyUnicode_WRITE(okind, odata, o++, ch);
-            }
         }
     }
     /* Closing quote already added at the beginning */
@@ -12149,19 +12504,23 @@ Return -1 on failure.");
 static PyObject *
 unicode_rfind(PyObject *self, PyObject *args)
 {
-    PyObject *substring;
-    Py_ssize_t start;
-    Py_ssize_t end;
+    PyObject *substring = NULL;
+    Py_ssize_t start = 0;
+    Py_ssize_t end = 0;
     Py_ssize_t result;
 
     if (!stringlib_parse_args_finds_unicode("rfind", args, &substring,
                                             &start, &end))
         return NULL;
 
-    if (PyUnicode_READY(self) == -1)
+    if (PyUnicode_READY(self) == -1) {
+        Py_DECREF(substring);
         return NULL;
-    if (PyUnicode_READY(substring) == -1)
+    }
+    if (PyUnicode_READY(substring) == -1) {
+        Py_DECREF(substring);
         return NULL;
+    }
 
     result = any_find_slice(-1, self, substring, start, end);
 
@@ -12181,19 +12540,23 @@ Like S.rfind() but raise ValueError when the substring is not found.");
 static PyObject *
 unicode_rindex(PyObject *self, PyObject *args)
 {
-    PyObject *substring;
-    Py_ssize_t start;
-    Py_ssize_t end;
+    PyObject *substring = NULL;
+    Py_ssize_t start = 0;
+    Py_ssize_t end = 0;
     Py_ssize_t result;
 
     if (!stringlib_parse_args_finds_unicode("rindex", args, &substring,
                                             &start, &end))
         return NULL;
 
-    if (PyUnicode_READY(self) == -1)
+    if (PyUnicode_READY(self) == -1) {
+        Py_DECREF(substring);
         return NULL;
-    if (PyUnicode_READY(substring) == -1)
+    }
+    if (PyUnicode_READY(substring) == -1) {
+        Py_DECREF(substring);
         return NULL;
+    }
 
     result = any_find_slice(-1, self, substring, start, end);
 
@@ -12325,7 +12688,7 @@ PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
     len1 = PyUnicode_GET_LENGTH(str_obj);
     len2 = PyUnicode_GET_LENGTH(sep_obj);
 
-    switch (PyUnicode_KIND(str_obj)) {
+    switch (kind) {
     case PyUnicode_1BYTE_KIND:
         if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
             out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
@@ -12381,12 +12744,12 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
         return NULL;
     }
 
-    kind1 = PyUnicode_KIND(str_in);
+    kind1 = PyUnicode_KIND(str_obj);
     kind2 = PyUnicode_KIND(sep_obj);
     kind = Py_MAX(kind1, kind2);
-    buf1 = PyUnicode_DATA(str_in);
+    buf1 = PyUnicode_DATA(str_obj);
     if (kind1 != kind)
-        buf1 = _PyUnicode_AsKind(str_in, kind);
+        buf1 = _PyUnicode_AsKind(str_obj, kind);
     if (!buf1)
         goto onError;
     buf2 = PyUnicode_DATA(sep_obj);
@@ -12397,7 +12760,7 @@ PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
     len1 = PyUnicode_GET_LENGTH(str_obj);
     len2 = PyUnicode_GET_LENGTH(sep_obj);
 
-    switch (PyUnicode_KIND(str_in)) {
+    switch (kind) {
     case PyUnicode_1BYTE_KIND:
         if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
             out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
@@ -12550,28 +12913,76 @@ unicode_swapcase(PyObject *self)
     return case_operation(self, do_swapcase);
 }
 
-PyDoc_STRVAR(maketrans__doc__,
-             "str.maketrans(x[, y[, z]]) -> dict (static method)\n\
-\n\
-Return a translation table usable for str.translate().\n\
-If there is only one argument, it must be a dictionary mapping Unicode\n\
-ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
-Character keys will be then converted to ordinals.\n\
-If there are two arguments, they must be strings of equal length, and\n\
-in the resulting dictionary, each character in x will be mapped to the\n\
-character at the same position in y. If there is a third argument, it\n\
-must be a string, whose characters will be mapped to None in the result.");
+/*[clinic input]
 
-static PyObject*
-unicode_maketrans(PyObject *null, PyObject *args)
+@staticmethod
+str.maketrans as unicode_maketrans
+
+  x: object
+
+  y: unicode=NULL
+
+  z: unicode=NULL
+
+  /
+
+Return a translation table usable for str.translate().
+
+If there is only one argument, it must be a dictionary mapping Unicode
+ordinals (integers) or characters to Unicode ordinals, strings or None.
+Character keys will be then converted to ordinals.
+If there are two arguments, they must be strings of equal length, and
+in the resulting dictionary, each character in x will be mapped to the
+character at the same position in y. If there is a third argument, it
+must be a string, whose characters will be mapped to None in the result.
+[clinic start generated code]*/
+
+PyDoc_STRVAR(unicode_maketrans__doc__,
+"maketrans(x, y=None, z=None, /)\n"
+"--\n"
+"\n"
+"Return a translation table usable for str.translate().\n"
+"\n"
+"If there is only one argument, it must be a dictionary mapping Unicode\n"
+"ordinals (integers) or characters to Unicode ordinals, strings or None.\n"
+"Character keys will be then converted to ordinals.\n"
+"If there are two arguments, they must be strings of equal length, and\n"
+"in the resulting dictionary, each character in x will be mapped to the\n"
+"character at the same position in y. If there is a third argument, it\n"
+"must be a string, whose characters will be mapped to None in the result.");
+
+#define UNICODE_MAKETRANS_METHODDEF    \
+    {"maketrans", (PyCFunction)unicode_maketrans, METH_VARARGS|METH_STATIC, unicode_maketrans__doc__},
+
+static PyObject *
+unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z);
+
+static PyObject *
+unicode_maketrans(void *null, PyObject *args)
+{
+    PyObject *return_value = NULL;
+    PyObject *x;
+    PyObject *y = NULL;
+    PyObject *z = NULL;
+
+    if (!PyArg_ParseTuple(args,
+        "O|UU:maketrans",
+        &x, &y, &z))
+        goto exit;
+    return_value = unicode_maketrans_impl(x, y, z);
+
+exit:
+    return return_value;
+}
+
+static PyObject *
+unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z)
+/*[clinic end generated code: output=566edf630f77436a input=7bfbf529a293c6c5]*/
 {
-    PyObject *x, *y = NULL, *z = NULL;
     PyObject *new = NULL, *key, *value;
     Py_ssize_t i = 0;
     int res;
 
-    if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
-        return NULL;
     new = PyDict_New();
     if (!new)
         return NULL;
@@ -12673,11 +13084,12 @@ unicode_maketrans(PyObject *null, PyObject *args)
 PyDoc_STRVAR(translate__doc__,
              "S.translate(table) -> str\n\
 \n\
-Return a copy of the string S, where all characters have been mapped\n\
-through the given translation table, which must be a mapping of\n\
-Unicode ordinals to Unicode ordinals, strings, or None.\n\
-Unmapped characters are left untouched. Characters mapped to None\n\
-are deleted.");
+Return a copy of the string S in which each character has been mapped\n\
+through the given translation table. The table must implement\n\
+lookup/indexing via __getitem__, for instance a dictionary or list,\n\
+mapping Unicode ordinals to Unicode ordinals, strings, or None. If\n\
+this operation raises LookupError, the character is left untouched.\n\
+Characters mapped to None are deleted.");
 
 static PyObject*
 unicode_translate(PyObject *self, PyObject *table)
@@ -12782,6 +13194,8 @@ unicode_startswith(PyObject *self,
                 return NULL;
             result = tailmatch(self, substring, start, end, -1);
             Py_DECREF(substring);
+            if (result == -1)
+                return NULL;
             if (result) {
                 Py_RETURN_TRUE;
             }
@@ -12798,6 +13212,8 @@ unicode_startswith(PyObject *self,
     }
     result = tailmatch(self, substring, start, end, -1);
     Py_DECREF(substring);
+    if (result == -1)
+        return NULL;
     return PyBool_FromLong(result);
 }
 
@@ -12831,6 +13247,8 @@ unicode_endswith(PyObject *self,
                 return NULL;
             result = tailmatch(self, substring, start, end, +1);
             Py_DECREF(substring);
+            if (result == -1)
+                return NULL;
             if (result) {
                 Py_RETURN_TRUE;
             }
@@ -12846,33 +13264,48 @@ unicode_endswith(PyObject *self,
     }
     result = tailmatch(self, substring, start, end, +1);
     Py_DECREF(substring);
+    if (result == -1)
+        return NULL;
     return PyBool_FromLong(result);
 }
 
 Py_LOCAL_INLINE(void)
 _PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
 {
-    writer->size = PyUnicode_GET_LENGTH(writer->buffer);
+    if (!writer->readonly)
+        writer->size = PyUnicode_GET_LENGTH(writer->buffer);
+    else {
+        /* Copy-on-write mode: set buffer size to 0 so
+         * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
+         * next write. */
+        writer->size = 0;
+    }
     writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
     writer->data = PyUnicode_DATA(writer->buffer);
     writer->kind = PyUnicode_KIND(writer->buffer);
 }
 
 void
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
 {
     memset(writer, 0, sizeof(*writer));
 #ifdef Py_DEBUG
     writer->kind = 5;    /* invalid kind */
 #endif
-    writer->min_length = Py_MAX(min_length, 100);
-    writer->overallocate = (min_length > 0);
+    writer->min_char = 127;
 }
 
 int
 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                  Py_ssize_t length, Py_UCS4 maxchar)
 {
+#ifdef MS_WINDOWS
+   /* On Windows, overallocate by 50% is the best factor */
+#  define OVERALLOCATE_FACTOR 2
+#else
+   /* On Linux, overallocate by 25% is the best factor */
+#  define OVERALLOCATE_FACTOR 4
+#endif
     Py_ssize_t newlen;
     PyObject *newbuffer;
 
@@ -12884,32 +13317,34 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
     }
     newlen = writer->pos + length;
 
+    maxchar = Py_MAX(maxchar, writer->min_char);
+
     if (writer->buffer == NULL) {
-        if (writer->overallocate) {
-            /* overallocate 25% to limit the number of resize */
-            if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
-                newlen += newlen / 4;
-            if (newlen < writer->min_length)
-                newlen = writer->min_length;
+        assert(!writer->readonly);
+        if (writer->overallocate
+            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
+            /* overallocate to limit the number of realloc() */
+            newlen += newlen / OVERALLOCATE_FACTOR;
         }
+        if (newlen < writer->min_length)
+            newlen = writer->min_length;
+
         writer->buffer = PyUnicode_New(newlen, maxchar);
         if (writer->buffer == NULL)
             return -1;
-        _PyUnicodeWriter_Update(writer);
-        return 0;
     }
-
-    if (newlen > writer->size) {
-        if (writer->overallocate) {
-            /* overallocate 25% to limit the number of resize */
-            if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
-                newlen += newlen / 4;
-            if (newlen < writer->min_length)
-                newlen = writer->min_length;
+    else if (newlen > writer->size) {
+        if (writer->overallocate
+            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
+            /* overallocate to limit the number of realloc() */
+            newlen += newlen / OVERALLOCATE_FACTOR;
         }
+        if (newlen < writer->min_length)
+            newlen = writer->min_length;
 
         if (maxchar > writer->maxchar || writer->readonly) {
             /* resize + widen */
+            maxchar = Py_MAX(maxchar, writer->maxchar);
             newbuffer = PyUnicode_New(newlen, maxchar);
             if (newbuffer == NULL)
                 return -1;
@@ -12924,7 +13359,6 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                 return -1;
         }
         writer->buffer = newbuffer;
-        _PyUnicodeWriter_Update(writer);
     }
     else if (maxchar > writer->maxchar) {
         assert(!writer->readonly);
@@ -12935,12 +13369,30 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                       writer->buffer, 0, writer->pos);
         Py_DECREF(writer->buffer);
         writer->buffer = newbuffer;
-        _PyUnicodeWriter_Update(writer);
     }
+    _PyUnicodeWriter_Update(writer);
+    return 0;
+
+#undef OVERALLOCATE_FACTOR
+}
+
+Py_LOCAL_INLINE(int)
+_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0)
+        return -1;
+    PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
+    writer->pos++;
     return 0;
 }
 
 int
+_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    return _PyUnicodeWriter_WriteCharInline(writer, ch);
+}
+
+int
 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
 {
     Py_UCS4 maxchar;
@@ -12954,11 +13406,10 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
     maxchar = PyUnicode_MAX_CHAR_VALUE(str);
     if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
         if (writer->buffer == NULL && !writer->overallocate) {
+            writer->readonly = 1;
             Py_INCREF(str);
             writer->buffer = str;
             _PyUnicodeWriter_Update(writer);
-            writer->readonly = 1;
-            writer->size = 0;
             writer->pos += len;
             return 0;
         }
@@ -12971,28 +13422,142 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
     return 0;
 }
 
+int
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
+                                Py_ssize_t start, Py_ssize_t end)
+{
+    Py_UCS4 maxchar;
+    Py_ssize_t len;
+
+    if (PyUnicode_READY(str) == -1)
+        return -1;
+
+    assert(0 <= start);
+    assert(end <= PyUnicode_GET_LENGTH(str));
+    assert(start <= end);
+
+    if (end == 0)
+        return 0;
+
+    if (start == 0 && end == PyUnicode_GET_LENGTH(str))
+        return _PyUnicodeWriter_WriteStr(writer, str);
+
+    if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
+        maxchar = _PyUnicode_FindMaxChar(str, start, end);
+    else
+        maxchar = writer->maxchar;
+    len = end - start;
+
+    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0)
+        return -1;
+
+    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                  str, start, len);
+    writer->pos += len;
+    return 0;
+}
+
+int
+_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
+                                  const char *ascii, Py_ssize_t len)
+{
+    if (len == -1)
+        len = strlen(ascii);
+
+    assert(ucs1lib_find_max_char((Py_UCS1*)ascii, (Py_UCS1*)ascii + len) < 128);
+
+    if (writer->buffer == NULL && !writer->overallocate) {
+        PyObject *str;
+
+        str = _PyUnicode_FromASCII(ascii, len);
+        if (str == NULL)
+            return -1;
+
+        writer->readonly = 1;
+        writer->buffer = str;
+        _PyUnicodeWriter_Update(writer);
+        writer->pos += len;
+        return 0;
+    }
+
+    if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
+        return -1;
+
+    switch (writer->kind)
+    {
+    case PyUnicode_1BYTE_KIND:
+    {
+        const Py_UCS1 *str = (const Py_UCS1 *)ascii;
+        Py_UCS1 *data = writer->data;
+
+        Py_MEMCPY(data + writer->pos, str, len);
+        break;
+    }
+    case PyUnicode_2BYTE_KIND:
+    {
+        _PyUnicode_CONVERT_BYTES(
+            Py_UCS1, Py_UCS2,
+            ascii, ascii + len,
+            (Py_UCS2 *)writer->data + writer->pos);
+        break;
+    }
+    case PyUnicode_4BYTE_KIND:
+    {
+        _PyUnicode_CONVERT_BYTES(
+            Py_UCS1, Py_UCS4,
+            ascii, ascii + len,
+            (Py_UCS4 *)writer->data + writer->pos);
+        break;
+    }
+    default:
+        assert(0);
+    }
+
+    writer->pos += len;
+    return 0;
+}
+
+int
+_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
+                                   const char *str, Py_ssize_t len)
+{
+    Py_UCS4 maxchar;
+
+    maxchar = ucs1lib_find_max_char((Py_UCS1*)str, (Py_UCS1*)str + len);
+    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
+        return -1;
+    unicode_write_cstr(writer->buffer, writer->pos, str, len);
+    writer->pos += len;
+    return 0;
+}
+
 PyObject *
 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
 {
+    PyObject *str;
     if (writer->pos == 0) {
-        Py_XDECREF(writer->buffer);
+        Py_CLEAR(writer->buffer);
         _Py_RETURN_UNICODE_EMPTY();
     }
     if (writer->readonly) {
-        assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
-        return writer->buffer;
+        str = writer->buffer;
+        writer->buffer = NULL;
+        assert(PyUnicode_GET_LENGTH(str) == writer->pos);
+        return str;
     }
     if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) {
         PyObject *newbuffer;
         newbuffer = resize_compact(writer->buffer, writer->pos);
         if (newbuffer == NULL) {
-            Py_DECREF(writer->buffer);
+            Py_CLEAR(writer->buffer);
             return NULL;
         }
         writer->buffer = newbuffer;
     }
-    assert(_PyUnicode_CheckConsistency(writer->buffer, 1));
-    return unicode_result_ready(writer->buffer);
+    str = writer->buffer;
+    writer->buffer = NULL;
+    assert(_PyUnicode_CheckConsistency(str, 1));
+    return unicode_result_ready(str);
 }
 
 void
@@ -13027,7 +13592,7 @@ unicode__format__(PyObject* self, PyObject* args)
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
-    _PyUnicodeWriter_Init(&writer, 0);
+    _PyUnicodeWriter_Init(&writer);
     ret = _PyUnicode_FormatAdvancedWriter(&writer,
                                           self, format_spec, 0,
                                           PyUnicode_GET_LENGTH(format_spec));
@@ -13096,7 +13661,8 @@ static PyMethodDef unicode_methods[] = {
     {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},
     {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__},
     {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
-    {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__},
+    {"expandtabs", (PyCFunction) unicode_expandtabs,
+     METH_VARARGS | METH_KEYWORDS, expandtabs__doc__},
     {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
     {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__},
     {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
@@ -13108,7 +13674,8 @@ static PyMethodDef unicode_methods[] = {
     {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
     {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
     {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__},
-    {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS | METH_KEYWORDS, splitlines__doc__},
+    {"splitlines", (PyCFunction) unicode_splitlines,
+     METH_VARARGS | METH_KEYWORDS, splitlines__doc__},
     {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
     {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
     {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
@@ -13130,8 +13697,7 @@ static PyMethodDef unicode_methods[] = {
     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
     {"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__},
     {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
-    {"maketrans", (PyCFunction) unicode_maketrans,
-     METH_VARARGS | METH_STATIC, maketrans__doc__},
+    UNICODE_MAKETRANS_METHODDEF
     {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__},
 #if 0
     /* These methods are just used for debugging the implementation. */
@@ -13246,16 +13812,39 @@ static PyMappingMethods unicode_as_mapping = {
 
 /* Helpers for PyUnicode_Format() */
 
+struct unicode_formatter_t {
+    PyObject *args;
+    int args_owned;
+    Py_ssize_t arglen, argidx;
+    PyObject *dict;
+
+    enum PyUnicode_Kind fmtkind;
+    Py_ssize_t fmtcnt, fmtpos;
+    void *fmtdata;
+    PyObject *fmtstr;
+
+    _PyUnicodeWriter writer;
+};
+
+struct unicode_format_arg_t {
+    Py_UCS4 ch;
+    int flags;
+    Py_ssize_t width;
+    int prec;
+    int sign;
+};
+
 static PyObject *
-getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
+unicode_format_getnextarg(struct unicode_formatter_t *ctx)
 {
-    Py_ssize_t argidx = *p_argidx;
-    if (argidx < arglen) {
-        (*p_argidx)++;
-        if (arglen < 0)
-            return args;
+    Py_ssize_t argidx = ctx->argidx;
+
+    if (argidx < ctx->arglen) {
+        ctx->argidx++;
+        if (ctx->arglen < 0)
+            return ctx->args;
         else
-            return PyTuple_GetItem(args, argidx);
+            return PyTuple_GetItem(ctx->args, argidx);
     }
     PyErr_SetString(PyExc_TypeError,
                     "not enough arguments for format string");
@@ -13264,33 +13853,42 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
 
 /* Returns a new reference to a PyUnicode object, or NULL on failure. */
 
+/* Format a float into the writer if the writer is not NULL, or into *p_output
+   otherwise.
+
+   Return 0 on success, raise an exception and return -1 on error. */
 static int
-formatfloat(PyObject *v, int flags, int prec, int type,
-            PyObject **p_output, _PyUnicodeWriter *writer)
+formatfloat(PyObject *v, struct unicode_format_arg_t *arg,
+            PyObject **p_output,
+            _PyUnicodeWriter *writer)
 {
     char *p;
     double x;
     Py_ssize_t len;
+    int prec;
+    int dtoa_flags;
 
     x = PyFloat_AsDouble(v);
     if (x == -1.0 && PyErr_Occurred())
         return -1;
 
+    prec = arg->prec;
     if (prec < 0)
         prec = 6;
 
-    p = PyOS_double_to_string(x, type, prec,
-                              (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
+    if (arg->flags & F_ALT)
+        dtoa_flags = Py_DTSF_ALT;
+    else
+        dtoa_flags = 0;
+    p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL);
     if (p == NULL)
         return -1;
     len = strlen(p);
     if (writer) {
-        if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) {
+        if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) {
             PyMem_Free(p);
             return -1;
         }
-        unicode_write_cstr(writer->buffer, writer->pos, p, len);
-        writer->pos += len;
     }
     else
         *p_output = _PyUnicode_FromASCII(p, len);
@@ -13317,7 +13915,7 @@ formatfloat(PyObject *v, int flags, int prec, int type,
  * produce a '-' sign, but can for Python's unbounded ints.
  */
 static PyObject*
-formatlong(PyObject *val, int flags, int prec, int type)
+formatlong(PyObject *val, struct unicode_format_arg_t *arg)
 {
     PyObject *result = NULL;
     char *buf;
@@ -13327,6 +13925,8 @@ formatlong(PyObject *val, int flags, int prec, int type)
     Py_ssize_t llen;
     int numdigits;      /* len == numnondigits + numdigits */
     int numnondigits = 0;
+    int prec = arg->prec;
+    int type = arg->ch;
 
     /* Avoid exceeding SSIZE_T_MAX */
     if (prec > INT_MAX-3) {
@@ -13338,13 +13938,14 @@ formatlong(PyObject *val, int flags, int prec, int type)
     assert(PyLong_Check(val));
 
     switch (type) {
+    default:
+        assert(!"'type' not in [diuoxX]");
     case 'd':
+    case 'i':
     case 'u':
-        /* Special-case boolean: we want 0/1 */
-        if (PyBool_Check(val))
-            result = PyNumber_ToBase(val, 10);
-        else
-            result = Py_TYPE(val)->tp_str(val);
+        /* int and int subclasses should print numerically when a numeric */
+        /* format code is used (see issue18780) */
+        result = PyNumber_ToBase(val, 10);
         break;
     case 'o':
         numnondigits = 2;
@@ -13355,8 +13956,6 @@ formatlong(PyObject *val, int flags, int prec, int type)
         numnondigits = 2;
         result = PyNumber_ToBase(val, 16);
         break;
-    default:
-        assert(!"'type' not in [duoxX]");
     }
     if (!result)
         return NULL;
@@ -13367,12 +13966,14 @@ formatlong(PyObject *val, int flags, int prec, int type)
 
     /* To modify the string in-place, there can only be one reference. */
     if (Py_REFCNT(result) != 1) {
+        Py_DECREF(result);
         PyErr_BadInternalCall();
         return NULL;
     }
     buf = PyUnicode_DATA(result);
     llen = PyUnicode_GET_LENGTH(result);
     if (llen > INT_MAX) {
+        Py_DECREF(result);
         PyErr_SetString(PyExc_ValueError,
                         "string too large in _PyBytes_FormatLong");
         return NULL;
@@ -13384,7 +13985,7 @@ formatlong(PyObject *val, int flags, int prec, int type)
     assert(numdigits > 0);
 
     /* Get rid of base marker unless F_ALT */
-    if (((flags & F_ALT) == 0 &&
+    if (((arg->flags & F_ALT) == 0 &&
         (type == 'o' || type == 'x' || type == 'X'))) {
         assert(buf[sign] == '0');
         assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
@@ -13429,15 +14030,121 @@ formatlong(PyObject *val, int flags, int prec, int type)
             if (buf[i] >= 'a' && buf[i] <= 'x')
                 buf[i] -= 'a'-'A';
     }
-    if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
+    if (!PyUnicode_Check(result)
+        || buf != PyUnicode_DATA(result)) {
         PyObject *unicode;
         unicode = _PyUnicode_FromASCII(buf, len);
         Py_DECREF(result);
         result = unicode;
     }
+    else if (len != PyUnicode_GET_LENGTH(result)) {
+        if (PyUnicode_Resize(&result, len) < 0)
+            Py_CLEAR(result);
+    }
     return result;
 }
 
+/* Format an integer or a float as an integer.
+ * Return 1 if the number has been formatted into the writer,
+ *        0 if the number has been formatted into *p_output
+ *       -1 and raise an exception on error */
+static int
+mainformatlong(PyObject *v,
+               struct unicode_format_arg_t *arg,
+               PyObject **p_output,
+               _PyUnicodeWriter *writer)
+{
+    PyObject *iobj, *res;
+    char type = (char)arg->ch;
+
+    if (!PyNumber_Check(v))
+        goto wrongtype;
+
+    /* make sure number is a type of integer */
+    /* if not, issue deprecation warning for now */
+    if (!PyLong_Check(v)) {
+        if (type == 'o' || type == 'x' || type == 'X') {
+            iobj = PyNumber_Index(v);
+            if (iobj == NULL) {
+                PyErr_Clear();
+                if (PyErr_WarnEx(PyExc_DeprecationWarning,
+                                 "automatic int conversions have been deprecated",
+                                 1)) {
+                    return -1;
+                }
+                iobj = PyNumber_Long(v);
+                if (iobj == NULL ) {
+                    if (PyErr_ExceptionMatches(PyExc_TypeError))
+                        goto wrongtype;
+                    return -1;
+                }
+            }
+        }
+        else {
+            iobj = PyNumber_Long(v);
+            if (iobj == NULL ) {
+                if (PyErr_ExceptionMatches(PyExc_TypeError))
+                    goto wrongtype;
+                return -1;
+            }
+        }
+        assert(PyLong_Check(iobj));
+    }
+    else {
+        iobj = v;
+        Py_INCREF(iobj);
+    }
+
+    if (PyLong_CheckExact(v)
+        && arg->width == -1 && arg->prec == -1
+        && !(arg->flags & (F_SIGN | F_BLANK))
+        && type != 'X')
+    {
+        /* Fast path */
+        int alternate = arg->flags & F_ALT;
+        int base;
+
+        switch(type)
+        {
+            default:
+                assert(0 && "'type' not in [diuoxX]");
+            case 'd':
+            case 'i':
+            case 'u':
+                base = 10;
+                break;
+            case 'o':
+                base = 8;
+                break;
+            case 'x':
+            case 'X':
+                base = 16;
+                break;
+        }
+
+        if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) {
+            Py_DECREF(iobj);
+            return -1;
+        }
+        Py_DECREF(iobj);
+        return 1;
+    }
+
+    res = formatlong(iobj, arg);
+    Py_DECREF(iobj);
+    if (res == NULL)
+        return -1;
+    *p_output = res;
+    return 0;
+
+wrongtype:
+    PyErr_Format(PyExc_TypeError,
+            "%%%c format: a number is required, "
+            "not %.200s",
+            type, Py_TYPE(v)->tp_name);
+    return -1;
+}
+
 static Py_UCS4
 formatchar(PyObject *v)
 {
@@ -13449,8 +14156,30 @@ formatchar(PyObject *v)
         goto onError;
     }
     else {
-        /* Integer input truncated to a character */
+        PyObject *iobj;
         long x;
+        /* make sure number is a type of integer */
+        /* if not, issue deprecation warning for now */
+        if (!PyLong_Check(v)) {
+            iobj = PyNumber_Index(v);
+            if (iobj == NULL) {
+                PyErr_Clear();
+                if (PyErr_WarnEx(PyExc_DeprecationWarning,
+                                 "automatic int conversions have been deprecated",
+                                 1)) {
+                    return -1;
+                }
+                iobj = PyNumber_Long(v);
+                if (iobj == NULL ) {
+                    if (PyErr_ExceptionMatches(PyExc_TypeError))
+                        goto onError;
+                    return -1;
+                }
+            }
+            v = iobj;
+            Py_DECREF(iobj);
+        }
+        /* Integer input truncated to a character */
         x = PyLong_AsLong(v);
         if (x == -1 && PyErr_Occurred())
             goto onError;
@@ -13470,540 +14199,588 @@ formatchar(PyObject *v)
     return (Py_UCS4) -1;
 }
 
-PyObject *
-PyUnicode_Format(PyObject *format, PyObject *args)
-{
-    Py_ssize_t fmtcnt, fmtpos, arglen, argidx;
-    int args_owned = 0;
-    PyObject *dict = NULL;
-    PyObject *temp = NULL;
-    PyObject *second = NULL;
-    PyObject *uformat;
-    void *fmt;
-    enum PyUnicode_Kind kind, fmtkind;
-    _PyUnicodeWriter writer;
-    Py_ssize_t sublen;
-    Py_UCS4 maxchar;
+/* Parse options of an argument: flags, width, precision.
+   Handle also "%(name)" syntax.
 
-    if (format == NULL || args == NULL) {
-        PyErr_BadInternalCall();
-        return NULL;
-    }
-    uformat = PyUnicode_FromObject(format);
-    if (uformat == NULL)
-        return NULL;
-    if (PyUnicode_READY(uformat) == -1) {
-        Py_DECREF(uformat);
-        return NULL;
-    }
+   Return 0 if the argument has been formatted into arg->str.
+   Return 1 if the argument has been written into ctx->writer,
+   Raise an exception and return -1 on error. */
+static int
+unicode_format_arg_parse(struct unicode_formatter_t *ctx,
+                         struct unicode_format_arg_t *arg)
+{
+#define FORMAT_READ(ctx) \
+        PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos)
 
-    fmt = PyUnicode_DATA(uformat);
-    fmtkind = PyUnicode_KIND(uformat);
-    fmtcnt = PyUnicode_GET_LENGTH(uformat);
-    fmtpos = 0;
+    PyObject *v;
 
-    _PyUnicodeWriter_Init(&writer, fmtcnt + 100);
+    if (arg->ch == '(') {
+        /* Get argument value from a dictionary. Example: "%(name)s". */
+        Py_ssize_t keystart;
+        Py_ssize_t keylen;
+        PyObject *key;
+        int pcount = 1;
 
-    if (PyTuple_Check(args)) {
-        arglen = PyTuple_Size(args);
-        argidx = 0;
-    }
-    else {
-        arglen = -1;
-        argidx = -2;
+        if (ctx->dict == NULL) {
+            PyErr_SetString(PyExc_TypeError,
+                            "format requires a mapping");
+            return -1;
+        }
+        ++ctx->fmtpos;
+        --ctx->fmtcnt;
+        keystart = ctx->fmtpos;
+        /* Skip over balanced parentheses */
+        while (pcount > 0 && --ctx->fmtcnt >= 0) {
+            arg->ch = FORMAT_READ(ctx);
+            if (arg->ch == ')')
+                --pcount;
+            else if (arg->ch == '(')
+                ++pcount;
+            ctx->fmtpos++;
+        }
+        keylen = ctx->fmtpos - keystart - 1;
+        if (ctx->fmtcnt < 0 || pcount > 0) {
+            PyErr_SetString(PyExc_ValueError,
+                            "incomplete format key");
+            return -1;
+        }
+        key = PyUnicode_Substring(ctx->fmtstr,
+                                  keystart, keystart + keylen);
+        if (key == NULL)
+            return -1;
+        if (ctx->args_owned) {
+            Py_DECREF(ctx->args);
+            ctx->args_owned = 0;
+        }
+        ctx->args = PyObject_GetItem(ctx->dict, key);
+        Py_DECREF(key);
+        if (ctx->args == NULL)
+            return -1;
+        ctx->args_owned = 1;
+        ctx->arglen = -1;
+        ctx->argidx = -2;
+    }
+
+    /* Parse flags. Example: "%+i" => flags=F_SIGN. */
+    while (--ctx->fmtcnt >= 0) {
+        arg->ch = FORMAT_READ(ctx);
+        ctx->fmtpos++;
+        switch (arg->ch) {
+        case '-': arg->flags |= F_LJUST; continue;
+        case '+': arg->flags |= F_SIGN; continue;
+        case ' ': arg->flags |= F_BLANK; continue;
+        case '#': arg->flags |= F_ALT; continue;
+        case '0': arg->flags |= F_ZERO; continue;
+        }
+        break;
     }
-    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
-        dict = args;
 
-    while (--fmtcnt >= 0) {
-        if (PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') {
-            Py_ssize_t nonfmtpos;
-            nonfmtpos = fmtpos++;
-            while (fmtcnt >= 0 &&
-                   PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') {
-                fmtpos++;
-                fmtcnt--;
-            }
-            if (fmtcnt < 0)
-                fmtpos--;
-            sublen = fmtpos - nonfmtpos;
-            maxchar = _PyUnicode_FindMaxChar(uformat,
-                                             nonfmtpos, nonfmtpos + sublen);
-            if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1)
-                goto onError;
-
-            _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
-                                          uformat, nonfmtpos, sublen);
-            writer.pos += sublen;
+    /* Parse width. Example: "%10s" => width=10 */
+    if (arg->ch == '*') {
+        v = unicode_format_getnextarg(ctx);
+        if (v == NULL)
+            return -1;
+        if (!PyLong_Check(v)) {
+            PyErr_SetString(PyExc_TypeError,
+                            "* wants int");
+            return -1;
         }
-        else {
-            /* Got a format specifier */
-            int flags = 0;
-            Py_ssize_t width = -1;
-            int prec = -1;
-            Py_UCS4 c = '\0';
-            Py_UCS4 fill;
-            int sign;
-            Py_UCS4 signchar;
-            int isnumok;
-            PyObject *v = NULL;
-            void *pbuf = NULL;
-            Py_ssize_t pindex, len;
-            Py_UCS4 bufmaxchar;
-            Py_ssize_t buflen;
-
-            fmtpos++;
-            c = PyUnicode_READ(fmtkind, fmt, fmtpos);
-            if (c == '(') {
-                Py_ssize_t keystart;
-                Py_ssize_t keylen;
-                PyObject *key;
-                int pcount = 1;
-
-                if (dict == NULL) {
-                    PyErr_SetString(PyExc_TypeError,
-                                    "format requires a mapping");
-                    goto onError;
-                }
-                ++fmtpos;
-                --fmtcnt;
-                keystart = fmtpos;
-                /* Skip over balanced parentheses */
-                while (pcount > 0 && --fmtcnt >= 0) {
-                    c = PyUnicode_READ(fmtkind, fmt, fmtpos);
-                    if (c == ')')
-                        --pcount;
-                    else if (c == '(')
-                        ++pcount;
-                    fmtpos++;
-                }
-                keylen = fmtpos - keystart - 1;
-                if (fmtcnt < 0 || pcount > 0) {
-                    PyErr_SetString(PyExc_ValueError,
-                                    "incomplete format key");
-                    goto onError;
-                }
-                key = PyUnicode_Substring(uformat,
-                                          keystart, keystart + keylen);
-                if (key == NULL)
-                    goto onError;
-                if (args_owned) {
-                    Py_DECREF(args);
-                    args_owned = 0;
-                }
-                args = PyObject_GetItem(dict, key);
-                Py_DECREF(key);
-                if (args == NULL) {
-                    goto onError;
-                }
-                args_owned = 1;
-                arglen = -1;
-                argidx = -2;
-            }
-            while (--fmtcnt >= 0) {
-                c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                switch (c) {
-                case '-': flags |= F_LJUST; continue;
-                case '+': flags |= F_SIGN; continue;
-                case ' ': flags |= F_BLANK; continue;
-                case '#': flags |= F_ALT; continue;
-                case '0': flags |= F_ZERO; continue;
-                }
+        arg->width = PyLong_AsSsize_t(v);
+        if (arg->width == -1 && PyErr_Occurred())
+            return -1;
+        if (arg->width < 0) {
+            arg->flags |= F_LJUST;
+            arg->width = -arg->width;
+        }
+        if (--ctx->fmtcnt >= 0) {
+            arg->ch = FORMAT_READ(ctx);
+            ctx->fmtpos++;
+        }
+    }
+    else if (arg->ch >= '0' && arg->ch <= '9') {
+        arg->width = arg->ch - '0';
+        while (--ctx->fmtcnt >= 0) {
+            arg->ch = FORMAT_READ(ctx);
+            ctx->fmtpos++;
+            if (arg->ch < '0' || arg->ch > '9')
                 break;
+            /* Since arg->ch is unsigned, the RHS would end up as unsigned,
+               mixing signed and unsigned comparison. Since arg->ch is between
+               '0' and '9', casting to int is safe. */
+            if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) {
+                PyErr_SetString(PyExc_ValueError,
+                                "width too big");
+                return -1;
             }
-            if (c == '*') {
-                v = getnextarg(args, arglen, &argidx);
-                if (v == NULL)
-                    goto onError;
-                if (!PyLong_Check(v)) {
-                    PyErr_SetString(PyExc_TypeError,
-                                    "* wants int");
-                    goto onError;
-                }
-                width = PyLong_AsSsize_t(v);
-                if (width == -1 && PyErr_Occurred())
-                    goto onError;
-                if (width < 0) {
-                    flags |= F_LJUST;
-                    width = -width;
-                }
-                if (--fmtcnt >= 0)
-                    c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-            }
-            else if (c >= '0' && c <= '9') {
-                width = c - '0';
-                while (--fmtcnt >= 0) {
-                    c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                    if (c < '0' || c > '9')
-                        break;
-                    /* Since c is unsigned, the RHS would end up as unsigned,
-                       mixing signed and unsigned comparison. Since c is between
-                       '0' and '9', casting to int is safe. */
-                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
-                        PyErr_SetString(PyExc_ValueError,
-                                        "width too big");
-                        goto onError;
-                    }
-                    width = width*10 + (c - '0');
-                }
+            arg->width = arg->width*10 + (arg->ch - '0');
+        }
+    }
+
+    /* Parse precision. Example: "%.3f" => prec=3 */
+    if (arg->ch == '.') {
+        arg->prec = 0;
+        if (--ctx->fmtcnt >= 0) {
+            arg->ch = FORMAT_READ(ctx);
+            ctx->fmtpos++;
+        }
+        if (arg->ch == '*') {
+            v = unicode_format_getnextarg(ctx);
+            if (v == NULL)
+                return -1;
+            if (!PyLong_Check(v)) {
+                PyErr_SetString(PyExc_TypeError,
+                                "* wants int");
+                return -1;
             }
-            if (c == '.') {
-                prec = 0;
-                if (--fmtcnt >= 0)
-                    c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                if (c == '*') {
-                    v = getnextarg(args, arglen, &argidx);
-                    if (v == NULL)
-                        goto onError;
-                    if (!PyLong_Check(v)) {
-                        PyErr_SetString(PyExc_TypeError,
-                                        "* wants int");
-                        goto onError;
-                    }
-                    prec = _PyLong_AsInt(v);
-                    if (prec == -1 && PyErr_Occurred())
-                        goto onError;
-                    if (prec < 0)
-                        prec = 0;
-                    if (--fmtcnt >= 0)
-                        c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                }
-                else if (c >= '0' && c <= '9') {
-                    prec = c - '0';
-                    while (--fmtcnt >= 0) {
-                        c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
-                        if (c < '0' || c > '9')
-                            break;
-                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
-                            PyErr_SetString(PyExc_ValueError,
-                                            "prec too big");
-                            goto onError;
-                        }
-                        prec = prec*10 + (c - '0');
-                    }
-                }
-            } /* prec */
-            if (fmtcnt >= 0) {
-                if (c == 'h' || c == 'l' || c == 'L') {
-                    if (--fmtcnt >= 0)
-                        c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
+            arg->prec = _PyLong_AsInt(v);
+            if (arg->prec == -1 && PyErr_Occurred())
+                return -1;
+            if (arg->prec < 0)
+                arg->prec = 0;
+            if (--ctx->fmtcnt >= 0) {
+                arg->ch = FORMAT_READ(ctx);
+                ctx->fmtpos++;
+            }
+        }
+        else if (arg->ch >= '0' && arg->ch <= '9') {
+            arg->prec = arg->ch - '0';
+            while (--ctx->fmtcnt >= 0) {
+                arg->ch = FORMAT_READ(ctx);
+                ctx->fmtpos++;
+                if (arg->ch < '0' || arg->ch > '9')
+                    break;
+                if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "precision too big");
+                    return -1;
                 }
+                arg->prec = arg->prec*10 + (arg->ch - '0');
             }
-            if (fmtcnt < 0) {
-                PyErr_SetString(PyExc_ValueError,
-                                "incomplete format");
-                goto onError;
-            }
-            if (fmtcnt == 0)
-                writer.overallocate = 0;
+        }
+    }
 
-            if (c == '%') {
-                if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1)
-                    goto onError;
-                PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '%');
-                writer.pos += 1;
-                continue;
+    /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */
+    if (ctx->fmtcnt >= 0) {
+        if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') {
+            if (--ctx->fmtcnt >= 0) {
+                arg->ch = FORMAT_READ(ctx);
+                ctx->fmtpos++;
             }
+        }
+    }
+    if (ctx->fmtcnt < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "incomplete format");
+        return -1;
+    }
+    return 0;
 
-            v = getnextarg(args, arglen, &argidx);
-            if (v == NULL)
-                goto onError;
+#undef FORMAT_READ
+}
 
-            sign = 0;
-            signchar = '\0';
-            fill = ' ';
-            switch (c) {
-
-            case 's':
-            case 'r':
-            case 'a':
-                if (PyLong_CheckExact(v) && width == -1 && prec == -1) {
-                    /* Fast path */
-                    if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
-                        goto onError;
-                    goto nextarg;
-                }
+/* Format one argument. Supported conversion specifiers:
 
-                if (PyUnicode_CheckExact(v) && c == 's') {
-                    temp = v;
-                    Py_INCREF(temp);
-                }
-                else {
-                    if (c == 's')
-                        temp = PyObject_Str(v);
-                    else if (c == 'r')
-                        temp = PyObject_Repr(v);
-                    else
-                        temp = PyObject_ASCII(v);
-                }
-                break;
+   - "s", "r", "a": any type
+   - "i", "d", "u": int or float
+   - "o", "x", "X": int
+   - "e", "E", "f", "F", "g", "G": float
+   - "c": int or str (1 character)
 
-            case 'i':
-            case 'd':
-            case 'u':
-            case 'o':
-            case 'x':
-            case 'X':
-                if (PyLong_CheckExact(v)
-                    && width == -1 && prec == -1
-                    && !(flags & (F_SIGN | F_BLANK)))
-                {
-                    /* Fast path */
-                    switch(c)
-                    {
-                    case 'd':
-                    case 'i':
-                    case 'u':
-                        if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
-                            goto onError;
-                        goto nextarg;
-                    case 'x':
-                        if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
-                            goto onError;
-                        goto nextarg;
-                    case 'o':
-                        if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
-                            goto onError;
-                        goto nextarg;
-                    default:
-                        break;
-                    }
-                }
+   When possible, the output is written directly into the Unicode writer
+   (ctx->writer). A string is created when padding is required.
 
-                isnumok = 0;
-                if (PyNumber_Check(v)) {
-                    PyObject *iobj=NULL;
+   Return 0 if the argument has been formatted into *p_str,
+          1 if the argument has been written into ctx->writer,
+         -1 on error. */
+static int
+unicode_format_arg_format(struct unicode_formatter_t *ctx,
+                          struct unicode_format_arg_t *arg,
+                          PyObject **p_str)
+{
+    PyObject *v;
+    _PyUnicodeWriter *writer = &ctx->writer;
 
-                    if (PyLong_Check(v)) {
-                        iobj = v;
-                        Py_INCREF(iobj);
-                    }
-                    else {
-                        iobj = PyNumber_Long(v);
-                    }
-                    if (iobj!=NULL) {
-                        if (PyLong_Check(iobj)) {
-                            isnumok = 1;
-                            sign = 1;
-                            temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c));
-                            Py_DECREF(iobj);
-                        }
-                        else {
-                            Py_DECREF(iobj);
-                        }
-                    }
-                }
-                if (!isnumok) {
-                    PyErr_Format(PyExc_TypeError,
-                                 "%%%c format: a number is required, "
-                                 "not %.200s", (char)c, Py_TYPE(v)->tp_name);
-                    goto onError;
-                }
-                if (flags & F_ZERO)
-                    fill = '0';
-                break;
+    if (ctx->fmtcnt == 0)
+        ctx->writer.overallocate = 0;
 
-            case 'e':
-            case 'E':
-            case 'f':
-            case 'F':
-            case 'g':
-            case 'G':
-                if (width == -1 && prec == -1
-                    && !(flags & (F_SIGN | F_BLANK)))
-                {
-                    /* Fast path */
-                    if (formatfloat(v, flags, prec, c, NULL, &writer) == -1)
-                        goto onError;
-                    goto nextarg;
-                }
+    if (arg->ch == '%') {
+        if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
+            return -1;
+        return 1;
+    }
 
-                sign = 1;
-                if (flags & F_ZERO)
-                    fill = '0';
-                if (formatfloat(v, flags, prec, c, &temp, NULL) == -1)
-                    temp = NULL;
-                break;
+    v = unicode_format_getnextarg(ctx);
+    if (v == NULL)
+        return -1;
 
-            case 'c':
-            {
-                Py_UCS4 ch = formatchar(v);
-                if (ch == (Py_UCS4) -1)
-                    goto onError;
-                if (width == -1 && prec == -1) {
-                    /* Fast path */
-                    if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1)
-                        goto onError;
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
-                    writer.pos += 1;
-                    goto nextarg;
-                }
-                temp = PyUnicode_FromOrdinal(ch);
-                break;
-            }
 
-            default:
-                PyErr_Format(PyExc_ValueError,
-                             "unsupported format character '%c' (0x%x) "
-                             "at index %zd",
-                             (31<=c && c<=126) ? (char)c : '?',
-                             (int)c,
-                             fmtpos - 1);
-                goto onError;
-            }
-            if (temp == NULL)
-                goto onError;
-            assert (PyUnicode_Check(temp));
+    switch (arg->ch) {
+    case 's':
+    case 'r':
+    case 'a':
+        if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) {
+            /* Fast path */
+            if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1)
+                return -1;
+            return 1;
+        }
 
-            if (width == -1 && prec == -1
-                && !(flags & (F_SIGN | F_BLANK)))
-            {
-                /* Fast path */
-                if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1)
-                    goto onError;
-                goto nextarg;
-            }
+        if (PyUnicode_CheckExact(v) && arg->ch == 's') {
+            *p_str = v;
+            Py_INCREF(*p_str);
+        }
+        else {
+            if (arg->ch == 's')
+                *p_str = PyObject_Str(v);
+            else if (arg->ch == 'r')
+                *p_str = PyObject_Repr(v);
+            else
+                *p_str = PyObject_ASCII(v);
+        }
+        break;
 
-            if (PyUnicode_READY(temp) == -1) {
-                Py_CLEAR(temp);
-                goto onError;
-            }
-            kind = PyUnicode_KIND(temp);
-            pbuf = PyUnicode_DATA(temp);
-            len = PyUnicode_GET_LENGTH(temp);
+    case 'i':
+    case 'd':
+    case 'u':
+    case 'o':
+    case 'x':
+    case 'X':
+    {
+        int ret = mainformatlong(v, arg, p_str, writer);
+        if (ret != 0)
+            return ret;
+        arg->sign = 1;
+        break;
+    }
 
-            if (c == 's' || c == 'r' || c == 'a') {
-                if (prec >= 0 && len > prec)
-                    len = prec;
-            }
+    case 'e':
+    case 'E':
+    case 'f':
+    case 'F':
+    case 'g':
+    case 'G':
+        if (arg->width == -1 && arg->prec == -1
+            && !(arg->flags & (F_SIGN | F_BLANK)))
+        {
+            /* Fast path */
+            if (formatfloat(v, arg, NULL, writer) == -1)
+                return -1;
+            return 1;
+        }
 
-            /* pbuf is initialized here. */
-            pindex = 0;
-            if (sign) {
-                Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
-                if (ch == '-' || ch == '+') {
-                    signchar = ch;
-                    len--;
-                    pindex++;
-                }
-                else if (flags & F_SIGN)
-                    signchar = '+';
-                else if (flags & F_BLANK)
-                    signchar = ' ';
-                else
-                    sign = 0;
-            }
-            if (width < len)
-                width = len;
-
-            /* Compute the length and maximum character of the
-               written characters */
-            bufmaxchar = 127;
-            if (!(flags & F_LJUST)) {
-                if (sign) {
-                    if ((width-1) > len)
-                        bufmaxchar = Py_MAX(bufmaxchar, fill);
-                }
-                else {
-                    if (width > len)
-                        bufmaxchar = Py_MAX(bufmaxchar, fill);
-                }
-            }
-            maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
-            bufmaxchar = Py_MAX(bufmaxchar, maxchar);
+        arg->sign = 1;
+        if (formatfloat(v, arg, p_str, NULL) == -1)
+            return -1;
+        break;
 
-            buflen = width;
-            if (sign && len == width)
-                buflen++;
+    case 'c':
+    {
+        Py_UCS4 ch = formatchar(v);
+        if (ch == (Py_UCS4) -1)
+            return -1;
+        if (arg->width == -1 && arg->prec == -1) {
+            /* Fast path */
+            if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0)
+                return -1;
+            return 1;
+        }
+        *p_str = PyUnicode_FromOrdinal(ch);
+        break;
+    }
 
-            if (_PyUnicodeWriter_Prepare(&writer, buflen, bufmaxchar) == -1)
-                goto onError;
+    default:
+        PyErr_Format(PyExc_ValueError,
+                     "unsupported format character '%c' (0x%x) "
+                     "at index %zd",
+                     (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?',
+                     (int)arg->ch,
+                     ctx->fmtpos - 1);
+        return -1;
+    }
+    if (*p_str == NULL)
+        return -1;
+    assert (PyUnicode_Check(*p_str));
+    return 0;
+}
 
-            /* Write characters */
-            if (sign) {
-                if (fill != ' ') {
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
-                    writer.pos += 1;
-                }
-                if (width > len)
-                    width--;
-            }
-            if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
-                assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
-                assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c);
-                if (fill != ' ') {
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
-                    writer.pos += 2;
-                    pindex += 2;
-                }
-                width -= 2;
-                if (width < 0)
-                    width = 0;
-                len -= 2;
-            }
-            if (width > len && !(flags & F_LJUST)) {
-                sublen = width - len;
-                FILL(writer.kind, writer.data, fill, writer.pos, sublen);
-                writer.pos += sublen;
-                width = len;
-            }
-            if (fill == ' ') {
-                if (sign) {
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar);
-                    writer.pos += 1;
-                }
-                if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
-                    assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
-                    assert(PyUnicode_READ(kind, pbuf, pindex+1) == c);
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0');
-                    PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c);
-                    writer.pos += 2;
-                    pindex += 2;
-                }
-            }
+static int
+unicode_format_arg_output(struct unicode_formatter_t *ctx,
+                          struct unicode_format_arg_t *arg,
+                          PyObject *str)
+{
+    Py_ssize_t len;
+    enum PyUnicode_Kind kind;
+    void *pbuf;
+    Py_ssize_t pindex;
+    Py_UCS4 signchar;
+    Py_ssize_t buflen;
+    Py_UCS4 maxchar;
+    Py_ssize_t sublen;
+    _PyUnicodeWriter *writer = &ctx->writer;
+    Py_UCS4 fill;
+
+    fill = ' ';
+    if (arg->sign && arg->flags & F_ZERO)
+        fill = '0';
+
+    if (PyUnicode_READY(str) == -1)
+        return -1;
+
+    len = PyUnicode_GET_LENGTH(str);
+    if ((arg->width == -1 || arg->width <= len)
+        && (arg->prec == -1 || arg->prec >= len)
+        && !(arg->flags & (F_SIGN | F_BLANK)))
+    {
+        /* Fast path */
+        if (_PyUnicodeWriter_WriteStr(writer, str) == -1)
+            return -1;
+        return 0;
+    }
 
-            if (len) {
-                _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
-                                              temp, pindex, len);
-                writer.pos += len;
+    /* Truncate the string for "s", "r" and "a" formats
+       if the precision is set */
+    if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') {
+        if (arg->prec >= 0 && len > arg->prec)
+            len = arg->prec;
+    }
+
+    /* Adjust sign and width */
+    kind = PyUnicode_KIND(str);
+    pbuf = PyUnicode_DATA(str);
+    pindex = 0;
+    signchar = '\0';
+    if (arg->sign) {
+        Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
+        if (ch == '-' || ch == '+') {
+            signchar = ch;
+            len--;
+            pindex++;
+        }
+        else if (arg->flags & F_SIGN)
+            signchar = '+';
+        else if (arg->flags & F_BLANK)
+            signchar = ' ';
+        else
+            arg->sign = 0;
+    }
+    if (arg->width < len)
+        arg->width = len;
+
+    /* Prepare the writer */
+    maxchar = writer->maxchar;
+    if (!(arg->flags & F_LJUST)) {
+        if (arg->sign) {
+            if ((arg->width-1) > len)
+                maxchar = Py_MAX(maxchar, fill);
+        }
+        else {
+            if (arg->width > len)
+                maxchar = Py_MAX(maxchar, fill);
+        }
+    }
+    if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
+        Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
+        maxchar = Py_MAX(maxchar, strmaxchar);
+    }
+
+    buflen = arg->width;
+    if (arg->sign && len == arg->width)
+        buflen++;
+    if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1)
+        return -1;
+
+    /* Write the sign if needed */
+    if (arg->sign) {
+        if (fill != ' ') {
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
+            writer->pos += 1;
+        }
+        if (arg->width > len)
+            arg->width--;
+    }
+
+    /* Write the numeric prefix for "x", "X" and "o" formats
+       if the alternate form is used.
+       For example, write "0x" for the "%#x" format. */
+    if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
+        assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
+        assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch);
+        if (fill != ' ') {
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
+            writer->pos += 2;
+            pindex += 2;
+        }
+        arg->width -= 2;
+        if (arg->width < 0)
+            arg->width = 0;
+        len -= 2;
+    }
+
+    /* Pad left with the fill character if needed */
+    if (arg->width > len && !(arg->flags & F_LJUST)) {
+        sublen = arg->width - len;
+        FILL(writer->kind, writer->data, fill, writer->pos, sublen);
+        writer->pos += sublen;
+        arg->width = len;
+    }
+
+    /* If padding with spaces: write sign if needed and/or numeric prefix if
+       the alternate form is used */
+    if (fill == ' ') {
+        if (arg->sign) {
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar);
+            writer->pos += 1;
+        }
+        if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) {
+            assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
+            assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch);
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0');
+            PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch);
+            writer->pos += 2;
+            pindex += 2;
+        }
+    }
+
+    /* Write characters */
+    if (len) {
+        _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+                                      str, pindex, len);
+        writer->pos += len;
+    }
+
+    /* Pad right with the fill character if needed */
+    if (arg->width > len) {
+        sublen = arg->width - len;
+        FILL(writer->kind, writer->data, ' ', writer->pos, sublen);
+        writer->pos += sublen;
+    }
+    return 0;
+}
+
+/* Helper of PyUnicode_Format(): format one arg.
+   Return 0 on success, raise an exception and return -1 on error. */
+static int
+unicode_format_arg(struct unicode_formatter_t *ctx)
+{
+    struct unicode_format_arg_t arg;
+    PyObject *str;
+    int ret;
+
+    arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos);
+    arg.flags = 0;
+    arg.width = -1;
+    arg.prec = -1;
+    arg.sign = 0;
+    str = NULL;
+
+    ret = unicode_format_arg_parse(ctx, &arg);
+    if (ret == -1)
+        return -1;
+
+    ret = unicode_format_arg_format(ctx, &arg, &str);
+    if (ret == -1)
+        return -1;
+
+    if (ret != 1) {
+        ret = unicode_format_arg_output(ctx, &arg, str);
+        Py_DECREF(str);
+        if (ret == -1)
+            return -1;
+    }
+
+    if (ctx->dict && (ctx->argidx < ctx->arglen) && arg.ch != '%') {
+        PyErr_SetString(PyExc_TypeError,
+                        "not all arguments converted during string formatting");
+        return -1;
+    }
+    return 0;
+}
+
+PyObject *
+PyUnicode_Format(PyObject *format, PyObject *args)
+{
+    struct unicode_formatter_t ctx;
+
+    if (format == NULL || args == NULL) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    ctx.fmtstr = PyUnicode_FromObject(format);
+    if (ctx.fmtstr == NULL)
+        return NULL;
+    if (PyUnicode_READY(ctx.fmtstr) == -1) {
+        Py_DECREF(ctx.fmtstr);
+        return NULL;
+    }
+    ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr);
+    ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr);
+    ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr);
+    ctx.fmtpos = 0;
+
+    _PyUnicodeWriter_Init(&ctx.writer);
+    ctx.writer.min_length = ctx.fmtcnt + 100;
+    ctx.writer.overallocate = 1;
+
+    if (PyTuple_Check(args)) {
+        ctx.arglen = PyTuple_Size(args);
+        ctx.argidx = 0;
+    }
+    else {
+        ctx.arglen = -1;
+        ctx.argidx = -2;
+    }
+    ctx.args_owned = 0;
+    if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args))
+        ctx.dict = args;
+    else
+        ctx.dict = NULL;
+    ctx.args = args;
+
+    while (--ctx.fmtcnt >= 0) {
+        if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
+            Py_ssize_t nonfmtpos;
+
+            nonfmtpos = ctx.fmtpos++;
+            while (ctx.fmtcnt >= 0 &&
+                   PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
+                ctx.fmtpos++;
+                ctx.fmtcnt--;
             }
-            if (width > len) {
-                sublen = width - len;
-                FILL(writer.kind, writer.data, ' ', writer.pos, sublen);
-                writer.pos += sublen;
+            if (ctx.fmtcnt < 0) {
+                ctx.fmtpos--;
+                ctx.writer.overallocate = 0;
             }
 
-nextarg:
-            if (dict && (argidx < arglen) && c != '%') {
-                PyErr_SetString(PyExc_TypeError,
-                                "not all arguments converted during string formatting");
+            if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
+                                                nonfmtpos, ctx.fmtpos) < 0)
                 goto onError;
-            }
-            Py_CLEAR(temp);
-        } /* '%' */
-    } /* until end */
-    if (argidx < arglen && !dict) {
+        }
+        else {
+            ctx.fmtpos++;
+            if (unicode_format_arg(&ctx) == -1)
+                goto onError;
+        }
+    }
+
+    if (ctx.argidx < ctx.arglen && !ctx.dict) {
         PyErr_SetString(PyExc_TypeError,
                         "not all arguments converted during string formatting");
         goto onError;
     }
 
-    if (args_owned) {
-        Py_DECREF(args);
+    if (ctx.args_owned) {
+        Py_DECREF(ctx.args);
     }
-    Py_DECREF(uformat);
-    Py_XDECREF(temp);
-    Py_XDECREF(second);
-    return _PyUnicodeWriter_Finish(&writer);
+    Py_DECREF(ctx.fmtstr);
+    return _PyUnicodeWriter_Finish(&ctx.writer);
 
   onError:
-    Py_DECREF(uformat);
-    Py_XDECREF(temp);
-    Py_XDECREF(second);
-    _PyUnicodeWriter_Dealloc(&writer);
-    if (args_owned) {
-        Py_DECREF(args);
+    Py_DECREF(ctx.fmtstr);
+    _PyUnicodeWriter_Dealloc(&ctx.writer);
+    if (ctx.args_owned) {
+        Py_DECREF(ctx.args);
     }
     return NULL;
 }
@@ -14263,7 +15040,7 @@ _PyUnicode_Fini(void)
 void
 PyUnicode_InternInPlace(PyObject **p)
 {
-    register PyObject *s = *p;
+    PyObject *s = *p;
     PyObject *t;
 #ifdef Py_DEBUG
     assert(s != NULL);
@@ -14292,12 +15069,12 @@ PyUnicode_InternInPlace(PyObject **p)
     t = PyDict_GetItem(interned, s);
     Py_END_ALLOW_RECURSION
 
-        if (t) {
-            Py_INCREF(t);
-            Py_DECREF(*p);
-            *p = t;
-            return;
-        }
+    if (t) {
+        Py_INCREF(t);
+        Py_DECREF(*p);
+        *p = t;
+        return;
+    }
 
     PyThreadState_GET()->recursion_critical = 1;
     if (PyDict_SetItem(interned, s, s) < 0) {
@@ -14600,7 +15377,7 @@ Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
 int
 Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
 {
-    register Py_UNICODE u1, u2;
+    Py_UNICODE u1, u2;
     for (; n != 0; n--) {
         u1 = *s1;
         u2 = *s2;
diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h
index 46a92bb..1fdc092 100644
--- a/Objects/unicodetype_db.h
+++ b/Objects/unicodetype_db.h
@@ -1589,7 +1589,7 @@ static unsigned short index2[] = {
     55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 
     0, 0, 0, 55, 55, 55, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 
     21, 21, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25, 
-    25, 25, 25, 5, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
+    25, 25, 25, 5, 21, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
     55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
     55, 55, 55, 96, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 
     25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 7, 8, 
@@ -1801,7 +1801,7 @@ static unsigned short index2[] = {
     25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 96, 5, 5, 5, 5, 55, 25, 0, 0, 7, 
     8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 
     27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
-    25, 25, 25, 2, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 
+    25, 25, 25, 21, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 
     55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
     55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 96, 
     55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
@@ -1828,7 +1828,7 @@ static unsigned short index2[] = {
     7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 132, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 
     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
     5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
-    55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 18, 0, 0, 5, 5, 55, 55, 55, 
+    55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 18, 25, 0, 0, 5, 5, 55, 55, 55, 
     55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
     55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
     55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 18, 25, 
@@ -1915,11 +1915,11 @@ static unsigned short index2[] = {
     5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 6, 3, 3, 21, 21, 21, 21, 21, 2, 5, 5, 
     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 21, 
-    21, 21, 21, 21, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0, 
+    21, 21, 21, 21, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 245, 95, 0, 0, 
     246, 247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 95, 245, 26, 22, 23, 246, 
     247, 248, 249, 250, 251, 5, 5, 5, 5, 5, 0, 95, 95, 95, 95, 95, 95, 95, 
     95, 95, 95, 95, 95, 95, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 
     25, 25, 25, 6, 6, 6, 6, 25, 6, 6, 6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 
     25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 113, 5, 5, 
@@ -2593,10 +2593,10 @@ static unsigned short index2[] = {
     0, 0, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 
     141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 
     141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 
-    141, 141, 141, 141, 141, 141, 141, 141, 141, 252, 252, 141, 141, 141, 
     141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 
     141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 
-    141, 141, 141, 252, 252, 141, 141, 141, 141, 141, 141, 141, 141, 141, 
+    141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 
+    141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 
     141, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 
     0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
     55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 
@@ -3380,6 +3380,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
     case 0x12435:
     case 0x1244A:
     case 0x12450:
+    case 0x12456:
     case 0x12459:
     case 0x1D361:
     case 0x1D7D0:
@@ -3427,6 +3428,8 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
         return (double) 20000.0;
     case 0x3251:
         return (double) 21.0;
+    case 0x12432:
+        return (double) 216000.0;
     case 0x3252:
         return (double) 22.0;
     case 0x3253:
@@ -3534,6 +3537,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
     case 0x1243B:
     case 0x1244B:
     case 0x12451:
+    case 0x12457:
     case 0x1D362:
     case 0x1D7D1:
     case 0x1D7DB:
@@ -3721,6 +3725,8 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
         return (double) 42.0;
     case 0x32B8:
         return (double) 43.0;
+    case 0x12433:
+        return (double) 432000.0;
     case 0x32B9:
         return (double) 44.0;
     case 0x32BA:
@@ -4271,7 +4277,7 @@ double _PyUnicode_ToNumeric(Py_UCS4 ch)
 /* Returns 1 for Unicode characters having the bidirectional
  * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.
  */
-int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)
+int _PyUnicode_IsWhitespace(const Py_UCS4 ch)
 {
     switch (ch) {
     case 0x0009:
@@ -4287,7 +4293,6 @@ int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)
     case 0x0085:
     case 0x00A0:
     case 0x1680:
-    case 0x180E:
     case 0x2000:
     case 0x2001:
     case 0x2002:
@@ -4313,7 +4318,7 @@ int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)
  * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
  * type 'B', 0 otherwise.
  */
-int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)
+int _PyUnicode_IsLinebreak(const Py_UCS4 ch)
 {
     switch (ch) {
     case 0x000A:
diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c
index b49dcee..d4d52e6 100644
--- a/Objects/weakrefobject.c
+++ b/Objects/weakrefobject.c
@@ -338,6 +338,11 @@ weakref___init__(PyObject *self, PyObject *args, PyObject *kwargs)
 }
 
 
+static PyMemberDef weakref_members[] = {
+    {"__callback__", T_OBJECT, offsetof(PyWeakReference, wr_callback), READONLY},
+    {NULL} /* Sentinel */
+};
+
 PyTypeObject
 _PyWeakref_RefType = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
@@ -369,7 +374,7 @@ _PyWeakref_RefType = {
     0,                          /*tp_iter*/
     0,                          /*tp_iternext*/
     0,                          /*tp_methods*/
-    0,                          /*tp_members*/
+    weakref_members,            /*tp_members*/
     0,                          /*tp_getset*/
     0,                          /*tp_base*/
     0,                          /*tp_dict*/
@@ -895,11 +900,9 @@ PyObject_ClearWeakRefs(PyObject *object)
     if (*list != NULL) {
         PyWeakReference *current = *list;
         Py_ssize_t count = _PyWeakref_GetWeakrefCount(current);
-        int restore_error = PyErr_Occurred() ? 1 : 0;
         PyObject *err_type, *err_value, *err_tb;
 
-        if (restore_error)
-            PyErr_Fetch(&err_type, &err_value, &err_tb);
+        PyErr_Fetch(&err_type, &err_value, &err_tb);
         if (count == 1) {
             PyObject *callback = current->wr_callback;
 
@@ -917,8 +920,7 @@ PyObject_ClearWeakRefs(PyObject *object)
 
             tuple = PyTuple_New(count * 2);
             if (tuple == NULL) {
-                if (restore_error)
-                    PyErr_Fetch(&err_type, &err_value, &err_tb);
+                _PyErr_ChainExceptions(err_type, err_value, err_tb);
                 return;
             }
 
@@ -949,7 +951,7 @@ PyObject_ClearWeakRefs(PyObject *object)
             }
             Py_DECREF(tuple);
         }
-        if (restore_error)
-            PyErr_Restore(err_type, err_value, err_tb);
+        assert(!PyErr_Occurred());
+        PyErr_Restore(err_type, err_value, err_tb);
     }
 }