From 63065d761e6c545216b9621982d16dd459abb1f8 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Tue, 15 May 2012 23:48:04 +0200
Subject: Issue #14624: UTF-16 decoding is now 3x to 4x faster on various
 inputs. Patch by Serhiy Storchaka.

---
 Misc/NEWS                  |   3 +
 Objects/stringlib/codecs.h | 149 +++++++++++++++++++++++-
 Objects/unicodeobject.c    | 277 +++++++++++++--------------------------------
 3 files changed, 230 insertions(+), 199 deletions(-)

diff --git a/Misc/NEWS b/Misc/NEWS
index 4870979..2ff55e8 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.3.0 Alpha 4?
 Core and Builtins
 -----------------
 
+- Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
+  Patch by Serhiy Storchaka.
+
 - asdl_seq and asdl_int_seq are now Py_ssize_t sized.
 
 - Issue #14133 (PEP 415): Implement suppression of __context__ display with an
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index 366011c..07627d6 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -215,7 +215,6 @@ InvalidContinuation:
     goto Return;
 }
 
-#undef LONG_PTR_MASK
 #undef ASCII_CHAR_MASK
 
 
@@ -415,4 +414,152 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 #undef MAX_SHORT_UNICHARS
 }
 
+/* The pattern for constructing UCS2-repeated masks. */
+#if SIZEOF_LONG == 8
+# define UCS2_REPEAT_MASK 0x0001000100010001ul
+#elif SIZEOF_LONG == 4
+# define UCS2_REPEAT_MASK 0x00010001ul
+#else
+# error C 'long' size should be either 4 or 8!
+#endif
+
+/* The mask for fast checking. */
+#if STRINGLIB_SIZEOF_CHAR == 1
+/* The mask for fast checking of whether a C 'long' contains a
+   non-ASCII or non-Latin1 UTF16-encoded characters. */
+# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
+#else
+/* The mask for fast checking of whether a C 'long' may contain
+   UTF16-encoded surrogate characters. This is an efficient heuristic,
+   assuming that non-surrogate characters with a code point >= 0x8000 are
+   rare in most input.
+*/
+# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * 0x8000u)
+#endif
+/* The mask for fast byte-swapping. */
+#define STRIPPED_MASK           (UCS2_REPEAT_MASK * 0x00FFu)
+/* Swap bytes. */
+#define SWAB(value)             ((((value) >> 8) & STRIPPED_MASK) | \
+                                 (((value) & STRIPPED_MASK) << 8))
+
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
+                        STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
+                        int native_ordering)
+{
+    Py_UCS4 ch;
+    const unsigned char *aligned_end =
+            (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
+    const unsigned char *q = *inptr;
+    STRINGLIB_CHAR *p = dest + *outpos;
+    /* Offsets from q for retrieving byte pairs in the right order. */
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+    int ihi = !!native_ordering, ilo = !native_ordering;
+#else
+    int ihi = !native_ordering, ilo = !!native_ordering;
+#endif
+    --e;
+
+    while (q < e) {
+        Py_UCS4 ch2;
+        /* First check for possible aligned read of a C 'long'. Unaligned
+           reads are more expensive, better to defer to another iteration. */
+        if (!((size_t) q & LONG_PTR_MASK)) {
+            /* Fast path for runs of in-range non-surrogate chars. */
+            register const unsigned char *_q = q;
+            while (_q < aligned_end) {
+                unsigned long block = * (unsigned long *) _q;
+                if (native_ordering) {
+                    /* Can use buffer directly */
+                    if (block & FAST_CHAR_MASK)
+                        break;
+                }
+                else {
+                    /* Need to byte-swap */
+                    if (block & SWAB(FAST_CHAR_MASK))
+                        break;
+#if STRINGLIB_SIZEOF_CHAR == 1
+                    block >>= 8;
+#else
+                    block = SWAB(block);
+#endif
+                }
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+# if SIZEOF_LONG == 4
+                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+                p[1] = (STRINGLIB_CHAR)(block >> 16);
+# elif SIZEOF_LONG == 8
+                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+                p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+                p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+                p[3] = (STRINGLIB_CHAR)(block >> 48);
+# endif
+#else
+# if SIZEOF_LONG == 4
+                p[0] = (STRINGLIB_CHAR)(block >> 16);
+                p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# elif SIZEOF_LONG == 8
+                p[0] = (STRINGLIB_CHAR)(block >> 48);
+                p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+                p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+                p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# endif
+#endif
+                _q += SIZEOF_LONG;
+                p += SIZEOF_LONG / 2;
+            }
+            q = _q;
+            if (q >= e)
+                break;
+        }
+
+        ch = (q[ihi] << 8) | q[ilo];
+        q += 2;
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+#if STRINGLIB_SIZEOF_CHAR < 2
+            if (ch > STRINGLIB_MAX_CHAR)
+                /* Out-of-range */
+                goto Return;
+#endif
+            *p++ = (STRINGLIB_CHAR)ch;
+            continue;
+        }
+
+        /* UTF-16 code pair: */
+        if (q >= e)
+            goto UnexpectedEnd;
+        if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
+            goto IllegalEncoding;
+        ch2 = (q[ihi] << 8) | q[ilo];
+        q += 2;
+        if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
+            goto IllegalSurrogate;
+        ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
+#if STRINGLIB_SIZEOF_CHAR < 4
+        /* Out-of-range */
+        goto Return;
+#else
+        *p++ = (STRINGLIB_CHAR)ch;
+#endif
+    }
+    ch = 0;
+Return:
+    *inptr = q;
+    *outpos = p - dest;
+    return ch;
+UnexpectedEnd:
+    ch = 1;
+    goto Return;
+IllegalEncoding:
+    ch = 2;
+    goto Return;
+IllegalSurrogate:
+    ch = 3;
+    goto Return;
+}
+#undef UCS2_REPEAT_MASK
+#undef FAST_CHAR_MASK
+#undef STRIPPED_MASK
+#undef SWAB
+#undef LONG_PTR_MASK
 #endif /* STRINGLIB_IS_UNICODE */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2e1e0bd..8fbc203 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5195,25 +5195,6 @@ PyUnicode_DecodeUTF16(const char *s,
     return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
 }
 
-/* Two masks for fast checking of whether a C 'long' may contain
-   UTF16-encoded surrogate characters. This is an efficient heuristic,
-   assuming that non-surrogate characters with a code point >= 0x8000 are
-   rare in most input.
-   FAST_CHAR_MASK is used when the input is in native byte ordering,
-   SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering.
-*/
-#if (SIZEOF_LONG == 8)
-# define FAST_CHAR_MASK         0x8000800080008000L
-# define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L
-# define STRIPPED_MASK          0x00FF00FF00FF00FFL
-#elif (SIZEOF_LONG == 4)
-# define FAST_CHAR_MASK         0x80008000L
-# define SWAPPED_FAST_CHAR_MASK 0x00800080L
-# define STRIPPED_MASK          0x00FF00FFL
-#else
-# error C 'long' size should be either 4 or 8!
-#endif
-
 PyObject *
 PyUnicode_DecodeUTF16Stateful(const char *s,
                               Py_ssize_t size,
@@ -5226,30 +5207,15 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
     Py_ssize_t endinpos;
     Py_ssize_t outpos;
     PyObject *unicode;
-    const unsigned char *q, *e, *aligned_end;
+    const unsigned char *q, *e;
     int bo = 0;       /* assume native ordering by default */
-    int native_ordering = 0;
+    int native_ordering;
     const char *errmsg = "";
-    /* Offsets from q for retrieving byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    int ihi = 1, ilo = 0;
-#else
-    int ihi = 0, ilo = 1;
-#endif
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    /* Note: size will always be longer than the resulting Unicode
-       character count */
-    unicode = PyUnicode_New(size, 127);
-    if (!unicode)
-        return NULL;
-    if (size == 0)
-        return unicode;
-    outpos = 0;
-
     q = (unsigned char *)s;
-    e = q + size - 1;
+    e = q + size;
 
     if (byteorder)
         bo = *byteorder;
@@ -5258,155 +5224,98 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
        byte order setting accordingly. In native mode, the leading BOM
        mark is skipped, in all other modes, it is copied to the output
        stream as-is (giving a ZWNBSP character). */
-    if (bo == 0) {
-        if (size >= 2) {
-            const Py_UCS4 bom = (q[ihi] << 8) | q[ilo];
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-            if (bom == 0xFEFF) {
-                q += 2;
-                bo = -1;
-            }
-            else if (bom == 0xFFFE) {
-                q += 2;
-                bo = 1;
-            }
-#else
-            if (bom == 0xFEFF) {
-                q += 2;
-                bo = 1;
-            }
-            else if (bom == 0xFFFE) {
-                q += 2;
-                bo = -1;
-            }
-#endif
+    if (bo == 0 && size >= 2) {
+        const Py_UCS4 bom = (q[1] << 8) | q[0];
+        if (bom == 0xFEFF) {
+            q += 2;
+            bo = -1;
+        }
+        else if (bom == 0xFFFE) {
+            q += 2;
+            bo = 1;
         }
+        if (byteorder)
+            *byteorder = bo;
     }
 
-    if (bo == -1) {
-        /* force LE */
-        ihi = 1;
-        ilo = 0;
-    }
-    else if (bo == 1) {
-        /* force BE */
-        ihi = 0;
-        ilo = 1;
+    if (q == e) {
+        if (consumed)
+            *consumed = size;
+        Py_INCREF(unicode_empty);
+        return unicode_empty;
     }
+
 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    native_ordering = ilo < ihi;
+    native_ordering = bo <= 0;
 #else
-    native_ordering = ilo > ihi;
+    native_ordering = bo >= 0;
 #endif
 
-    aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
-    while (q < e) {
-        Py_UCS4 ch;
-        /* First check for possible aligned read of a C 'long'. Unaligned
-           reads are more expensive, better to defer to another iteration. */
-        if (!((size_t) q & LONG_PTR_MASK)) {
-            /* Fast path for runs of non-surrogate chars. */
-            register const unsigned char *_q = q;
+    /* Note: size will always be longer than the resulting Unicode
+       character count */
+    unicode = PyUnicode_New((e - q + 1) / 2, 127);
+    if (!unicode)
+        return NULL;
+
+    outpos = 0;
+    while (1) {
+        Py_UCS4 ch = 0;
+        if (e - q >= 2) {
             int kind = PyUnicode_KIND(unicode);
-            void *data = PyUnicode_DATA(unicode);
-            while (_q < aligned_end) {
-                unsigned long block = * (unsigned long *) _q;
-                Py_UCS4 maxch;
-                if (native_ordering) {
-                    /* Can use buffer directly */
-                    if (block & FAST_CHAR_MASK)
-                        break;
-                }
-                else {
-                    /* Need to byte-swap */
-                    if (block & SWAPPED_FAST_CHAR_MASK)
-                        break;
-                    block = ((block >> 8) & STRIPPED_MASK) |
-                            ((block & STRIPPED_MASK) << 8);
-                }
-                maxch = (Py_UCS2)(block & 0xFFFF);
-#if SIZEOF_LONG == 8
-                ch = (Py_UCS2)((block >> 16) & 0xFFFF);
-                maxch = MAX_MAXCHAR(maxch, ch);
-                ch = (Py_UCS2)((block >> 32) & 0xFFFF);
-                maxch = MAX_MAXCHAR(maxch, ch);
-                ch = (Py_UCS2)(block >> 48);
-                maxch = MAX_MAXCHAR(maxch, ch);
-#else
-                ch = (Py_UCS2)(block >> 16);
-                maxch = MAX_MAXCHAR(maxch, ch);
-#endif
-                if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) {
-                    if (unicode_widen(&unicode, outpos, maxch) < 0)
-                        goto onError;
-                    kind = PyUnicode_KIND(unicode);
-                    data = PyUnicode_DATA(unicode);
-                }
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
-#if SIZEOF_LONG == 8
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
-#else
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
-#endif
-#else
-#if SIZEOF_LONG == 8
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
-#else
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
-#endif
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
-#endif
-                _q += SIZEOF_LONG;
+            if (kind == PyUnicode_1BYTE_KIND) {
+                if (PyUnicode_IS_ASCII(unicode))
+                    ch = asciilib_utf16_decode(&q, e,
+                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            native_ordering);
+                else
+                    ch = ucs1lib_utf16_decode(&q, e,
+                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            native_ordering);
+            } else if (kind == PyUnicode_2BYTE_KIND) {
+                ch = ucs2lib_utf16_decode(&q, e,
+                        PyUnicode_2BYTE_DATA(unicode), &outpos,
+                        native_ordering);
+            } else {
+                assert(kind == PyUnicode_4BYTE_KIND);
+                ch = ucs4lib_utf16_decode(&q, e,
+                        PyUnicode_4BYTE_DATA(unicode), &outpos,
+                        native_ordering);
             }
-            q = _q;
-            if (q >= e)
-                break;
         }
-        ch = (q[ihi] << 8) | q[ilo];
 
-        q += 2;
-
-        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+        switch (ch)
+        {
+        case 0:
+            /* remaining byte at the end? (size should be even) */
+            if (q == e || consumed)
+                goto End;
+            errmsg = "truncated data";
+            startinpos = ((const char *)q) - starts;
+            endinpos = ((const char *)e) - starts;
+            break;
+            /* The remaining input chars are ignored if the callback
+               chooses to skip the input */
+        case 1:
+            errmsg = "unexpected end of data";
+            startinpos = ((const char *)q) - 2 - starts;
+            endinpos = ((const char *)e) - starts;
+            break;
+        case 2:
+            errmsg = "illegal encoding";
+            startinpos = ((const char *)q) - 2 - starts;
+            endinpos = startinpos + 2;
+            break;
+        case 3:
+            errmsg = "illegal UTF-16 surrogate";
+            startinpos = ((const char *)q) - 4 - starts;
+            endinpos = startinpos + 2;
+            break;
+        default:
             if (unicode_putchar(&unicode, &outpos, ch) < 0)
                 goto onError;
             continue;
         }
 
-        /* UTF-16 code pair: */
-        if (q > e) {
-            errmsg = "unexpected end of data";
-            startinpos = (((const char *)q) - 2) - starts;
-            endinpos = ((const char *)e) + 1 - starts;
-            goto utf16Error;
-        }
-        if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) {
-            Py_UCS4 ch2 = (q[ihi] << 8) | q[ilo];
-            q += 2;
-            if (Py_UNICODE_IS_LOW_SURROGATE(ch2)) {
-                if (unicode_putchar(&unicode, &outpos,
-                                    Py_UNICODE_JOIN_SURROGATES(ch, ch2)) < 0)
-                    goto onError;
-                continue;
-            }
-            else {
-                errmsg = "illegal UTF-16 surrogate";
-                startinpos = (((const char *)q)-4)-starts;
-                endinpos = startinpos+2;
-                goto utf16Error;
-            }
-
-        }
-        errmsg = "illegal encoding";
-        startinpos = (((const char *)q)-2)-starts;
-        endinpos = startinpos+2;
-        /* Fall through to report the error */
-
-      utf16Error:
         if (unicode_decode_call_errorhandler(
                 errors,
                 &errorHandler,
@@ -5421,33 +5330,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
                 &outpos))
             goto onError;
     }
-    /* remaining byte at the end? (size should be even) */
-    if (e == q) {
-        if (!consumed) {
-            errmsg = "truncated data";
-            startinpos = ((const char *)q) - starts;
-            endinpos = ((const char *)e) + 1 - starts;
-            if (unicode_decode_call_errorhandler(
-                    errors,
-                    &errorHandler,
-                    "utf16", errmsg,
-                    &starts,
-                    (const char **)&e,
-                    &startinpos,
-                    &endinpos,
-                    &exc,
-                    (const char **)&q,
-                    &unicode,
-                    &outpos))
-                goto onError;
-            /* The remaining input chars are ignored if the callback
-               chooses to skip the input */
-        }
-    }
-
-    if (byteorder)
-        *byteorder = bo;
 
+End:
     if (consumed)
         *consumed = (const char *)q-starts;
 
@@ -5466,9 +5350,6 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
     return NULL;
 }
 
-#undef FAST_CHAR_MASK
-#undef SWAPPED_FAST_CHAR_MASK
-
 PyObject *
 _PyUnicode_EncodeUTF16(PyObject *str,
                        const char *errors,
-- 
cgit v0.12


From a790c9b6d6b5b706f1b29be316f2e41d24eeb295 Mon Sep 17 00:00:00 2001
From: "Eric V. Smith" <eric@trueblade.com>
Date: Tue, 15 May 2012 20:44:06 -0400
Subject: Issue #14817: Add rudimentary tests for pkgutil.extend_path.

---
 Lib/test/test_pkgutil.py | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py
index f4e0323..d5d7448 100644
--- a/Lib/test/test_pkgutil.py
+++ b/Lib/test/test_pkgutil.py
@@ -137,8 +137,54 @@ class PkgutilPEP302Tests(unittest.TestCase):
         self.assertEqual(foo.loads, 1)
         del sys.modules['foo']
 
+
+class ExtendPathTests(unittest.TestCase):
+    def create_init(self, pkgname):
+        dirname = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, dirname)
+        sys.path.insert(0, dirname)
+
+        pkgdir = os.path.join(dirname, pkgname)
+        os.mkdir(pkgdir)
+        with open(os.path.join(pkgdir, '__init__.py'), 'w') as fl:
+            fl.write('from pkgutil import extend_path\n__path__ = extend_path(__path__, __name__)\n')
+
+        return dirname
+
+    def create_submodule(self, dirname, pkgname, submodule_name, value):
+        module_name = os.path.join(dirname, pkgname, submodule_name + '.py')
+        with open(module_name, 'w') as fl:
+            print('value={}'.format(value), file=fl)
+
+    def setUp(self):
+        # Create 2 directories on sys.path
+        self.pkgname = 'foo'
+        self.dirname_0 = self.create_init(self.pkgname)
+        self.dirname_1 = self.create_init(self.pkgname)
+
+    def tearDown(self):
+        del sys.path[0]
+        del sys.path[0]
+
+    def test_simple(self):
+        self.create_submodule(self.dirname_0, self.pkgname, 'bar', 0)
+        self.create_submodule(self.dirname_1, self.pkgname, 'baz', 1)
+        import foo.bar
+        import foo.baz
+        # Ensure we read the expected values
+        self.assertEqual(foo.bar.value, 0)
+        self.assertEqual(foo.baz.value, 1)
+
+        # Ensure the path is set up correctly
+        self.assertEqual(sorted(foo.__path__),
+                         sorted([os.path.join(self.dirname_0, self.pkgname),
+                                 os.path.join(self.dirname_1, self.pkgname)]))
+
+    # XXX: test .pkg files
+
+
 def test_main():
-    run_unittest(PkgutilTests, PkgutilPEP302Tests)
+    run_unittest(PkgutilTests, PkgutilPEP302Tests, ExtendPathTests)
     # this is necessary if test is run repeated (like when finding leaks)
     import zipimport
     zipimport._zip_directory_cache.clear()
-- 
cgit v0.12


From 4d377d98a1b4ab115bcd816553600e603e388831 Mon Sep 17 00:00:00 2001
From: Ned Deily <nad@acm.org>
Date: Tue, 15 May 2012 18:08:11 -0700
Subject: Issue #14777: In an X11 windowing environment, tkinter may return
 undecoded UTF-8 bytes as a string when accessing the Tk clipboard. Modify
 clipboad_get() to first request type UTF8_STRING when no specific type is
 requested in an X11 windowing environment, falling back to the current
 default type STRING if that fails. Original patch by Thomas Kluyver.

---
 Lib/tkinter/__init__.py | 28 ++++++++++++++++++++++++++--
 Misc/NEWS               |  6 ++++++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py
index 8af5065..de85bf9 100644
--- a/Lib/tkinter/__init__.py
+++ b/Lib/tkinter/__init__.py
@@ -526,12 +526,19 @@ class Misc:
 
         The type keyword specifies the form in which the data is
         to be returned and should be an atom name such as STRING
-        or FILE_NAME.  Type defaults to STRING.
+        or FILE_NAME.  Type defaults to STRING, except on X11, where the default
+        is to try UTF8_STRING and fall back to STRING.
 
         This command is equivalent to:
 
         selection_get(CLIPBOARD)
         """
+        if 'type' not in kw and self._windowingsystem == 'x11':
+            try:
+                kw['type'] = 'UTF8_STRING'
+                return self.tk.call(('clipboard', 'get') + self._options(kw))
+            except TclError:
+                del kw['type']
         return self.tk.call(('clipboard', 'get') + self._options(kw))
 
     def clipboard_clear(self, **kw):
@@ -613,8 +620,16 @@ class Misc:
         A keyword parameter selection specifies the name of
         the selection and defaults to PRIMARY.  A keyword
         parameter displayof specifies a widget on the display
-        to use."""
+        to use. A keyword parameter type specifies the form of data to be
+        fetched, defaulting to STRING except on X11, where UTF8_STRING is tried
+        before STRING."""
         if 'displayof' not in kw: kw['displayof'] = self._w
+        if 'type' not in kw and self._windowingsystem == 'x11':
+            try:
+                kw['type'] = 'UTF8_STRING'
+                return self.tk.call(('selection', 'get') + self._options(kw))
+            except TclError:
+                del kw['type']
         return self.tk.call(('selection', 'get') + self._options(kw))
     def selection_handle(self, command, **kw):
         """Specify a function COMMAND to call if the X
@@ -1029,6 +1044,15 @@ class Misc:
         if displayof is None:
             return ('-displayof', self._w)
         return ()
+    @property
+    def _windowingsystem(self):
+        """Internal function."""
+        try:
+            return self._root()._windowingsystem_cached
+        except AttributeError:
+            ws = self._root()._windowingsystem_cached = \
+                        self.tk.call('tk', 'windowingsystem')
+            return ws
     def _options(self, cnf, kw = None):
         """Internal function."""
         if kw:
diff --git a/Misc/NEWS b/Misc/NEWS
index f3b14b3..9b9ca67 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -63,6 +63,12 @@ Core and Builtins
 Library
 -------
 
+- Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when
+  accessing the Tk clipboard.  Modify clipboad_get() to first request type
+  UTF8_STRING when no specific type is requested in an X11 windowing
+  environment, falling back to the current default type STRING if that fails.
+  Original patch by Thomas Kluyver.
+
 - Issue #12541: Be lenient with quotes around Realm field of HTTP Basic
   Authentation in urllib2.
 
-- 
cgit v0.12


From ea1badbfef340c63d4f821662c887e46ed62a6ec Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Tue, 15 May 2012 22:07:52 -0400
Subject: #1440472: Explain that email parser/generator isn't *quite*
 "idempotent"

---
 Doc/library/email.generator.rst | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/Doc/library/email.generator.rst b/Doc/library/email.generator.rst
index 85b32fe..033dcf1 100644
--- a/Doc/library/email.generator.rst
+++ b/Doc/library/email.generator.rst
@@ -17,7 +17,8 @@ yourself.  However the bundled generator knows how to generate most email in a
 standards-compliant way, should handle MIME and non-MIME email messages just
 fine, and is designed so that the transformation from flat text, to a message
 structure via the :class:`~email.parser.Parser` class, and back to flat text,
-is idempotent (the input is identical to the output).  On the other hand, using
+is idempotent (the input is identical to the output) [#]_.  On the other hand,
+using
 the Generator on a :class:`~email.message.Message` constructed by program may
 result in changes to the :class:`~email.message.Message` object as defaults are
 filled in.
@@ -204,3 +205,12 @@ representing the part.
    The default value for *fmt* is ``None``, meaning ::
 
       [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+
+
+.. rubric:: Footnotes
+
+.. [#] This statement assumes that you use the appropriate setting for the
+       ``unixfrom`` argument, and that you set maxheaderlen=0 (which will
+       preserve whatever the input line lengths were).  It is also not strictly
+       true, since in many cases runs of whitespace in headers are collapsed
+       into single blanks.  The latter is a bug that will eventually be fixed.
-- 
cgit v0.12


From 28e68ea6eafbc7d2bac25ce2be3c046992cc73b6 Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Tue, 15 May 2012 22:13:29 -0400
Subject: #1440472: reflow

---
 Doc/library/email.generator.rst | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/Doc/library/email.generator.rst b/Doc/library/email.generator.rst
index 033dcf1..f40869c 100644
--- a/Doc/library/email.generator.rst
+++ b/Doc/library/email.generator.rst
@@ -18,10 +18,9 @@ standards-compliant way, should handle MIME and non-MIME email messages just
 fine, and is designed so that the transformation from flat text, to a message
 structure via the :class:`~email.parser.Parser` class, and back to flat text,
 is idempotent (the input is identical to the output) [#]_.  On the other hand,
-using
-the Generator on a :class:`~email.message.Message` constructed by program may
-result in changes to the :class:`~email.message.Message` object as defaults are
-filled in.
+using the Generator on a :class:`~email.message.Message` constructed by program
+may result in changes to the :class:`~email.message.Message` object as defaults
+are filled in.
 
 :class:`bytes` output can be generated using the :class:`BytesGenerator` class.
 If the message object structure contains non-ASCII bytes, this generator's
-- 
cgit v0.12


From 51b2ed51f0cc18a69a1d53eb9f0f088c99681afc Mon Sep 17 00:00:00 2001
From: Hynek Schlawack <hs@ox.cx>
Date: Wed, 16 May 2012 09:51:07 +0200
Subject: #14809: Add HTTP status codes from RFC 6585 to http.server and
 http.client

Patch by EungJun Yi.
---
 Doc/library/http.client.rst | 15 +++++++++++++++
 Lib/http/client.py          |  8 ++++++++
 Lib/http/server.py          | 10 +++++++++-
 Misc/ACKS                   |  1 +
 Misc/NEWS                   |  3 +++
 5 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
index f0da8ee..ae6c91f 100644
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -339,6 +339,15 @@ and also the following constants for integer status codes:
 | :const:`UPGRADE_REQUIRED`                | ``426`` | HTTP Upgrade to TLS,                                                  |
 |                                          |         | :rfc:`2817`, Section 6                                                |
 +------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`PRECONDITION_REQUIRED`           | ``428`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 3                                                |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`TOO_MANY_REQUESTS`               | ``429`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 4                                                |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`REQUEST_HEADER_FIELDS_TOO_LARGE` | ``431`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 5                                                |
++------------------------------------------+---------+-----------------------------------------------------------------------+
 | :const:`INTERNAL_SERVER_ERROR`           | ``500`` | HTTP/1.1, `RFC 2616, Section                                          |
 |                                          |         | 10.5.1                                                                |
 |                                          |         | <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.1>`_  |
@@ -369,6 +378,12 @@ and also the following constants for integer status codes:
 | :const:`NOT_EXTENDED`                    | ``510`` | An HTTP Extension Framework,                                          |
 |                                          |         | :rfc:`2774`, Section 7                                                |
 +------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`NETWORK_AUTHENTICATION_REQUIRED` | ``511`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 6                                                |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+
+  .. versionchanged:: 3.3
+     Added codes ``428``, ``429``, ``431`` and ``511`` from :rfc:`6585`.
 
 
 .. data:: responses
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 9c110d5..6089192 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -141,6 +141,9 @@ UNPROCESSABLE_ENTITY = 422
 LOCKED = 423
 FAILED_DEPENDENCY = 424
 UPGRADE_REQUIRED = 426
+PRECONDITION_REQUIRED = 428
+TOO_MANY_REQUESTS = 429
+REQUEST_HEADER_FIELDS_TOO_LARGE = 431
 
 # server error
 INTERNAL_SERVER_ERROR = 500
@@ -151,6 +154,7 @@ GATEWAY_TIMEOUT = 504
 HTTP_VERSION_NOT_SUPPORTED = 505
 INSUFFICIENT_STORAGE = 507
 NOT_EXTENDED = 510
+NETWORK_AUTHENTICATION_REQUIRED = 511
 
 # Mapping status codes to official W3C names
 responses = {
@@ -192,6 +196,9 @@ responses = {
     415: 'Unsupported Media Type',
     416: 'Requested Range Not Satisfiable',
     417: 'Expectation Failed',
+    428: 'Precondition Required',
+    429: 'Too Many Requests',
+    431: 'Request Header Fields Too Large',
 
     500: 'Internal Server Error',
     501: 'Not Implemented',
@@ -199,6 +206,7 @@ responses = {
     503: 'Service Unavailable',
     504: 'Gateway Timeout',
     505: 'HTTP Version Not Supported',
+    511: 'Network Authentication Required',
 }
 
 # maximal amount of data to read at one time in _safe_read
diff --git a/Lib/http/server.py b/Lib/http/server.py
index c1b0596..cb66f2b 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -573,7 +573,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
 
     # Table mapping response codes to messages; entries have the
     # form {code: (shortmessage, longmessage)}.
-    # See RFC 2616.
+    # See RFC 2616 and 6585.
     responses = {
         100: ('Continue', 'Request received, please continue'),
         101: ('Switching Protocols',
@@ -628,6 +628,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
               'Cannot satisfy request range.'),
         417: ('Expectation Failed',
               'Expect condition could not be satisfied.'),
+        428: ('Precondition Required',
+              'The origin server requires the request to be conditional.'),
+        429: ('Too Many Requests', 'The user has sent too many requests '
+              'in a given amount of time ("rate limiting").'),
+        431: ('Request Header Fields Too Large', 'The server is unwilling to '
+              'process the request because its header fields are too large.'),
 
         500: ('Internal Server Error', 'Server got itself in trouble'),
         501: ('Not Implemented',
@@ -638,6 +644,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
         504: ('Gateway Timeout',
               'The gateway server did not receive a timely response'),
         505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
+        511: ('Network Authentication Required',
+              'The client needs to authenticate to gain network access.'),
         }
 
 
diff --git a/Misc/ACKS b/Misc/ACKS
index 2be4bf7..75b544e 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1129,6 +1129,7 @@ Florent Xicluna
 Hirokazu Yamamoto
 Ka-Ping Yee
 Jason Yeo
+EungJun Yi
 Bob Yodlowski
 Danny Yoo
 George Yoshida
diff --git a/Misc/NEWS b/Misc/NEWS
index 418bfc3..d27640d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -34,6 +34,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #14809: Add HTTP status codes introduced by RFC 6585 to http.server
+  and http.client. Patch by EungJun Yi.
+
 - Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when
   accessing the Tk clipboard.  Modify clipboad_get() to first request type
   UTF8_STRING when no specific type is requested in an X11 windowing
-- 
cgit v0.12


From 5cdc6308b6ac134ab65f2001e37b0c067dadef7d Mon Sep 17 00:00:00 2001
From: "Eric V. Smith" <eric@trueblade.com>
Date: Wed, 16 May 2012 04:48:04 -0400
Subject: Cleanup so subsequent tests won't fail. Needs to be moved into a
 support routine (see 14715).

---
 Lib/test/test_pkgutil.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py
index d5d7448..6025bcd 100644
--- a/Lib/test/test_pkgutil.py
+++ b/Lib/test/test_pkgutil.py
@@ -165,6 +165,9 @@ class ExtendPathTests(unittest.TestCase):
     def tearDown(self):
         del sys.path[0]
         del sys.path[0]
+        del sys.modules['foo']
+        del sys.modules['foo.bar']
+        del sys.modules['foo.baz']
 
     def test_simple(self):
         self.create_submodule(self.dirname_0, self.pkgname, 'bar', 0)
-- 
cgit v0.12


From adbda1766025ff31ee20565988ca41885c9a3543 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Wed, 16 May 2012 11:31:13 +0200
Subject: Update .hgignore for new MSVC files

---
 .hgignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.hgignore b/.hgignore
index 1eb60c0..34fd18b 100644
--- a/.hgignore
+++ b/.hgignore
@@ -56,6 +56,9 @@ PC/python_nt*.h
 PC/pythonnt_rc*.h
 PC/*.obj
 PC/*.exe
+PC/*/*.user
+PC/*/*.ncb
+PC/*/*.suo
 PCbuild/*.exe
 PCbuild/*.dll
 PCbuild/*.pdb
@@ -69,6 +72,7 @@ PCbuild/*.suo
 PCbuild/*.*sdf
 PCbuild/Win32-temp-*
 PCbuild/x64-temp-*
+BuildLog.htm
 __pycache__
 Modules/_testembed
 .coverage
-- 
cgit v0.12


From e7672d38dc430036539a2b1a279757d1cc819af7 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Wed, 16 May 2012 11:33:08 +0200
Subject: Issue #14732: The _csv module now uses PEP 3121 module
 initialization. Patch by Robin Schreiber.

---
 Misc/ACKS      |   1 +
 Misc/NEWS      |   3 ++
 Modules/_csv.c | 105 ++++++++++++++++++++++++++++++++++++++-------------------
 3 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/Misc/ACKS b/Misc/ACKS
index 75b544e..acf8a34 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -919,6 +919,7 @@ Ralf Schmitt
 Michael Schneider
 Peter Schneider-Kamp
 Arvin Schnell
+Robin Schreiber
 Chad J. Schroeder
 Sam Schulenburg
 Stefan Schwarzer
diff --git a/Misc/NEWS b/Misc/NEWS
index d27640d..a80238a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -34,6 +34,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #14732: The _csv module now uses PEP 3121 module initialization.
+  Patch by Robin Schreiber.
+
 - Issue #14809: Add HTTP status codes introduced by RFC 6585 to http.server
   and http.client. Patch by EungJun Yi.
 
diff --git a/Modules/_csv.c b/Modules/_csv.c
index 443309c..89ce122 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -16,9 +16,39 @@ module instead.
 #define IS_BASESTRING(o) \
     PyUnicode_Check(o)
 
-static PyObject *error_obj;     /* CSV exception */
-static PyObject *dialects;      /* Dialect registry */
-static long field_limit = 128 * 1024;   /* max parsed field size */
+typedef struct {
+    PyObject *error_obj;   /* CSV exception */
+    PyObject *dialects;   /* Dialect registry */
+    long field_limit;   /* max parsed field size */
+} _csvstate;
+
+#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
+
+static int
+_csv_clear(PyObject *m)
+{
+    Py_CLEAR(_csvstate(m)->error_obj);
+    Py_CLEAR(_csvstate(m)->dialects);
+    return 0;
+}
+
+static int
+_csv_traverse(PyObject *m, visitproc visit, void *arg)
+{
+    Py_VISIT(_csvstate(m)->error_obj);
+    Py_VISIT(_csvstate(m)->dialects);
+    return 0;
+}
+
+static void
+_csv_free(void *m)
+{
+   _csv_clear((PyObject *)m);
+}
+
+static struct PyModuleDef _csvmodule;
+
+#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
 
 typedef enum {
     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
@@ -103,10 +133,10 @@ get_dialect_from_registry(PyObject * name_obj)
 {
     PyObject *dialect_obj;
 
-    dialect_obj = PyDict_GetItem(dialects, name_obj);
+    dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
     if (dialect_obj == NULL) {
         if (!PyErr_Occurred())
-            PyErr_Format(error_obj, "unknown dialect");
+            PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
     }
     else
         Py_INCREF(dialect_obj);
@@ -544,9 +574,9 @@ parse_grow_buff(ReaderObj *self)
 static int
 parse_add_char(ReaderObj *self, Py_UCS4 c)
 {
-    if (self->field_len >= field_limit) {
-        PyErr_Format(error_obj, "field larger than field limit (%ld)",
-                     field_limit);
+    if (self->field_len >= _csvstate_global->field_limit) {
+        PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
+                     _csvstate_global->field_limit);
         return -1;
     }
     if (self->field_len == self->field_size && !parse_grow_buff(self))
@@ -703,7 +733,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
         }
         else {
             /* illegal */
-            PyErr_Format(error_obj, "'%c' expected after '%c'",
+            PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
                             dialect->delimiter,
                             dialect->quotechar);
             return -1;
@@ -716,7 +746,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
         else if (c == '\0')
             self->state = START_RECORD;
         else {
-            PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
+            PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
             return -1;
         }
         break;
@@ -755,12 +785,12 @@ Reader_iternext(ReaderObj *self)
         if (lineobj == NULL) {
             /* End of input OR exception */
             if (!PyErr_Occurred() && self->field_len != 0)
-                PyErr_Format(error_obj,
+                PyErr_Format(_csvstate_global->error_obj,
                              "newline inside string");
             return NULL;
         }
         if (!PyUnicode_Check(lineobj)) {
-            PyErr_Format(error_obj,
+            PyErr_Format(_csvstate_global->error_obj,
                          "iterator should return strings, "
                          "not %.200s "
                          "(did you open the file in text mode?)",
@@ -778,7 +808,7 @@ Reader_iternext(ReaderObj *self)
             c = PyUnicode_READ(kind, data, pos);
             if (c == '\0') {
                 Py_DECREF(lineobj);
-                PyErr_Format(error_obj,
+                PyErr_Format(_csvstate_global->error_obj,
                              "line contains NULL byte");
                 goto err;
             }
@@ -994,7 +1024,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
             }
             if (want_escape) {
                 if (!dialect->escapechar) {
-                    PyErr_Format(error_obj,
+                    PyErr_Format(_csvstate_global->error_obj,
                                  "need to escape, but no escapechar set");
                     return -1;
                 }
@@ -1010,7 +1040,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
      */
     if (i == 0 && quote_empty) {
         if (dialect->quoting == QUOTE_NONE) {
-            PyErr_Format(error_obj,
+            PyErr_Format(_csvstate_global->error_obj,
                 "single empty field record must be quoted");
             return -1;
         }
@@ -1127,7 +1157,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
     PyObject *line, *result;
 
     if (!PySequence_Check(seq))
-        return PyErr_Format(error_obj, "sequence expected");
+        return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
 
     len = PySequence_Length(seq);
     if (len < 0)
@@ -1353,7 +1383,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
 static PyObject *
 csv_list_dialects(PyObject *module, PyObject *args)
 {
-    return PyDict_Keys(dialects);
+    return PyDict_Keys(_csvstate_global->dialects);
 }
 
 static PyObject *
@@ -1372,7 +1402,7 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
     dialect = _call_dialect(dialect_obj, kwargs);
     if (dialect == NULL)
         return NULL;
-    if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
+    if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
         Py_DECREF(dialect);
         return NULL;
     }
@@ -1384,8 +1414,8 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
 static PyObject *
 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
 {
-    if (PyDict_DelItem(dialects, name_obj) < 0)
-        return PyErr_Format(error_obj, "unknown dialect");
+    if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
+        return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
     Py_INCREF(Py_None);
     return Py_None;
 }
@@ -1400,7 +1430,7 @@ static PyObject *
 csv_field_size_limit(PyObject *module, PyObject *args)
 {
     PyObject *new_limit = NULL;
-    long old_limit = field_limit;
+    long old_limit = _csvstate_global->field_limit;
 
     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
         return NULL;
@@ -1410,9 +1440,9 @@ csv_field_size_limit(PyObject *module, PyObject *args)
                          "limit must be an integer");
             return NULL;
         }
-        field_limit = PyLong_AsLong(new_limit);
-        if (field_limit == -1 && PyErr_Occurred()) {
-            field_limit = old_limit;
+        _csvstate_global->field_limit = PyLong_AsLong(new_limit);
+        if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
+            _csvstate_global->field_limit = old_limit;
             return NULL;
         }
     }
@@ -1551,17 +1581,16 @@ static struct PyMethodDef csv_methods[] = {
     { NULL, NULL }
 };
 
-
 static struct PyModuleDef _csvmodule = {
     PyModuleDef_HEAD_INIT,
     "_csv",
     csv_module_doc,
-    -1,
+    sizeof(_csvstate),
     csv_methods,
     NULL,
-    NULL,
-    NULL,
-    NULL
+    _csv_traverse,
+    _csv_clear,
+    _csv_free
 };
 
 PyMODINIT_FUNC
@@ -1589,11 +1618,16 @@ PyInit__csv(void)
                                    MODULE_VERSION) == -1)
         return NULL;
 
+    /* Set the field limit */
+    _csvstate(module)->field_limit = 128 * 1024;
+    /* Do I still need to add this var to the Module Dict? */
+
     /* Add _dialects dictionary */
-    dialects = PyDict_New();
-    if (dialects == NULL)
+    _csvstate(module)->dialects = PyDict_New();
+    if (_csvstate(module)->dialects == NULL)
         return NULL;
-    if (PyModule_AddObject(module, "_dialects", dialects))
+    Py_INCREF(_csvstate(module)->dialects);
+    if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
         return NULL;
 
     /* Add quote styles into dictionary */
@@ -1609,9 +1643,10 @@ PyInit__csv(void)
         return NULL;
 
     /* Add the CSV exception object to the module. */
-    error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
-    if (error_obj == NULL)
+    _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
+    if (_csvstate(module)->error_obj == NULL)
         return NULL;
-    PyModule_AddObject(module, "Error", error_obj);
+    Py_INCREF(_csvstate(module)->error_obj);
+    PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
     return module;
 }
-- 
cgit v0.12


From 32bc80c523ed31d86d3deda99e0c18d4fc6c2e93 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Wed, 16 May 2012 12:51:55 +0200
Subject: Fix build failure.

---
 Objects/exceptions.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index b994862..9e10b7e 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -349,7 +349,8 @@ PyException_SetContext(PyObject *self, PyObject *context) {
 
 static struct PyMemberDef BaseException_members[] = {
     {"__suppress_context__", T_BOOL,
-     offsetof(PyBaseExceptionObject, suppress_context)}
+     offsetof(PyBaseExceptionObject, suppress_context)},
+    {NULL}
 };
 
 
-- 
cgit v0.12


From b84bc7a7ce6649f786b5b3f80e944cb51b332faf Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Wed, 16 May 2012 12:58:04 +0200
Subject: Avoid "warning: no newline at end of file" in importlib.h.

---
 Python/freeze_importlib.py | 2 ++
 Python/importlib.h         | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Python/freeze_importlib.py b/Python/freeze_importlib.py
index c012722..b96e7f2 100644
--- a/Python/freeze_importlib.py
+++ b/Python/freeze_importlib.py
@@ -25,6 +25,8 @@ def main(input_path, output_path):
     with open(output_path, 'w', encoding='utf-8') as output_file:
         output_file.write('\n'.join(lines))
         output_file.write('/* Mercurial binary marker: \x00 */')
+        # Avoid a compiler warning for lack of EOL
+        output_file.write('\n')
 
 
 if __name__ == '__main__':
diff --git a/Python/importlib.h b/Python/importlib.h
index 0beeb59..cf5619a 100644
--- a/Python/importlib.h
+++ b/Python/importlib.h
@@ -3029,4 +3029,4 @@ unsigned char _Py_M__importlib[] = {
     12,17,6,2,12,47,18,25,12,23,12,15,24,30,6,1,
     6,3,12,54,
 };
-/* Mercurial binary marker:   */
\ No newline at end of file
+/* Mercurial binary marker:   */
-- 
cgit v0.12


From a103b96a80f049f68ccf2dd3d5d7858b26a27e94 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Wed, 16 May 2012 14:37:54 +0200
Subject: Issue #14829: Fix bisect and range() indexing with large indices (>=
 2 ** 32) under 64-bit Windows.

---
 Misc/NEWS               | 3 +++
 Modules/_bisectmodule.c | 3 ++-
 Objects/rangeobject.c   | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Misc/NEWS b/Misc/NEWS
index 9b9ca67..08d21f7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -63,6 +63,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #14829: Fix bisect and range() indexing with large indices
+  (>= 2 ** 32) under 64-bit Windows.
+
 - Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when
   accessing the Tk clipboard.  Modify clipboad_get() to first request type
   UTF8_STRING when no specific type is requested in an X11 windowing
diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c
index 93d0eed..eae2978 100644
--- a/Modules/_bisectmodule.c
+++ b/Modules/_bisectmodule.c
@@ -3,6 +3,7 @@
 Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru).
 */
 
+#define PY_SSIZE_T_CLEAN
 #include "Python.h"
 
 static Py_ssize_t
@@ -192,7 +193,7 @@ insort_left(PyObject *self, PyObject *args, PyObject *kw)
         if (PyList_Insert(list, index, item) < 0)
             return NULL;
     } else {
-        result = PyObject_CallMethod(list, "insert", "iO", index, item);
+        result = PyObject_CallMethod(list, "insert", "nO", index, item);
         if (result == NULL)
             return NULL;
         Py_DECREF(result);
diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c
index 58d373c..935b205 100644
--- a/Objects/rangeobject.c
+++ b/Objects/rangeobject.c
@@ -307,7 +307,7 @@ compute_range_item(rangeobject *r, PyObject *arg)
 static PyObject *
 range_item(rangeobject *r, Py_ssize_t i)
 {
-    PyObject *res, *arg = PyLong_FromLong(i);
+    PyObject *res, *arg = PyLong_FromSsize_t(i);
     if (!arg) {
         return NULL;
     }
-- 
cgit v0.12


From b3c16fb040bcc1785486047153b977950c2b8186 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Wed, 16 May 2012 14:42:38 +0200
Subject: More .hgignore additions for new VS build files

---
 .hgignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.hgignore b/.hgignore
index 34fd18b..4e4c2ce 100644
--- a/.hgignore
+++ b/.hgignore
@@ -32,7 +32,6 @@ Modules/Setup.local
 Modules/config.c
 Modules/ld_so_aix$
 Parser/pgen$
-PCbuild/amd64/
 ^core
 ^python-gdb.py
 ^python.exe-gdb.py
@@ -59,6 +58,9 @@ PC/*.exe
 PC/*/*.user
 PC/*/*.ncb
 PC/*/*.suo
+PC/*/Win32-temp-*
+PC/*/x64-temp-*
+PC/*/amd64
 PCbuild/*.exe
 PCbuild/*.dll
 PCbuild/*.pdb
@@ -72,6 +74,7 @@ PCbuild/*.suo
 PCbuild/*.*sdf
 PCbuild/Win32-temp-*
 PCbuild/x64-temp-*
+PCbuild/amd64
 BuildLog.htm
 __pycache__
 Modules/_testembed
-- 
cgit v0.12


From fb5b954ee0c389b7cba4b07a3411ca6f1795cb94 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <solipsis@pitrou.net>
Date: Wed, 16 May 2012 15:01:40 +0200
Subject: Forward port additional tests from 2.7 (issue #14829).

---
 Lib/test/test_bisect.py | 47 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_bisect.py b/Lib/test/test_bisect.py
index c24a1a2..2ac3a68 100644
--- a/Lib/test/test_bisect.py
+++ b/Lib/test/test_bisect.py
@@ -23,6 +23,28 @@ del sys.modules['bisect']
 import bisect as c_bisect
 
 
+class Range(object):
+    """A trivial range()-like object without any integer width limitations."""
+    def __init__(self, start, stop):
+        self.start = start
+        self.stop = stop
+        self.last_insert = None
+
+    def __len__(self):
+        return self.stop - self.start
+
+    def __getitem__(self, idx):
+        n = self.stop - self.start
+        if idx < 0:
+            idx += n
+        if idx >= n:
+            raise IndexError(idx)
+        return self.start + idx
+
+    def insert(self, idx, item):
+        self.last_insert = idx, item
+
+
 class TestBisect(unittest.TestCase):
     module = None
 
@@ -125,9 +147,28 @@ class TestBisect(unittest.TestCase):
     def test_large_range(self):
         # Issue 13496
         mod = self.module
-        data = range(sys.maxsize-1)
-        self.assertEqual(mod.bisect_left(data, sys.maxsize-3), sys.maxsize-3)
-        self.assertEqual(mod.bisect_right(data, sys.maxsize-3), sys.maxsize-2)
+        n = sys.maxsize
+        data = range(n-1)
+        self.assertEqual(mod.bisect_left(data, n-3), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3), n-2)
+        self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
+
+    def test_large_pyrange(self):
+        # Same as above, but without C-imposed limits on range() parameters
+        mod = self.module
+        n = sys.maxsize
+        data = Range(0, n-1)
+        self.assertEqual(mod.bisect_left(data, n-3), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3), n-2)
+        self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
+        x = n - 100
+        mod.insort_left(data, x, x - 50, x + 50)
+        self.assertEqual(data.last_insert, (x, x))
+        x = n - 200
+        mod.insort_right(data, x, x - 50, x + 50)
+        self.assertEqual(data.last_insert, (x + 1, x))
 
     def test_random(self, n=25):
         from random import randrange
-- 
cgit v0.12