31 files changed, 529 insertions, 364 deletions
diff --git a/Doc/library/enum.rst b/Doc/library/enum.rst
index 3661469..236275d 100644
--- a/Doc/library/enum.rst
+++ b/Doc/library/enum.rst
@@ -38,7 +38,8 @@ follows::
     ...     blue = 3
     ...
 
-..note: Nomenclature
+.. note:: Nomenclature
+
   - The class :class:`Color` is an *enumeration* (or *enum*)
   - The attributes :attr:`Color.red`, :attr:`Color.green`, etc., are
     *enumeration members* (or *enum members*).
@@ -474,7 +475,7 @@ Some rules:
 4. %-style formatting:  `%s` and `%r` call :class:`Enum`'s :meth:`__str__` and
    :meth:`__repr__` respectively; other codes (such as `%i` or `%h` for
    IntEnum) treat the enum member as its mixed-in type.
-5. :class:`str`.:meth:`__format__` (or :func:`format`) will use the mixed-in
+5. :meth:`str.__format__` (or :func:`format`) will use the mixed-in
    type's :meth:`__format__`.  If the :class:`Enum`'s :func:`str` or
    :func:`repr` is desired use the `!s` or `!r` :class:`str` format codes.
 
diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst
index 0ee93ed..25f34bf 100644
--- a/Doc/library/itertools.rst
+++ b/Doc/library/itertools.rst
@@ -48,6 +48,7 @@ Iterator                Arguments                       Results
 ====================    ============================    =================================================   =============================================================
 :func:`accumulate`      p [,func]                       p0, p0+p1, p0+p1+p2, ...                            ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15``
 :func:`chain`           p, q, ...                       p0, p1, ... plast, q0, q1, ...                      ``chain('ABC', 'DEF') --> A B C D E F``
+chain.from_iterable     iterable                        p0, p1, ... plast, q0, q1, ...                      ``chain.from_iterable(['ABC', 'DEF']) --> A B C D E F``
 :func:`compress`        data, selectors                 (d[0] if s[0]), (d[1] if s[1]), ...                 ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F``
 :func:`dropwhile`       pred, seq                       seq[n], seq[n+1], starting when pred fails          ``dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1``
 :func:`filterfalse`     pred, seq                       elements of seq where pred(elem) is False           ``filterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8``
@@ -156,9 +157,8 @@ loops that truncate the stream.
 .. classmethod:: chain.from_iterable(iterable)
 
    Alternate constructor for :func:`chain`.  Gets chained inputs from a
-   single iterable argument that is evaluated lazily.  Equivalent to::
+   single iterable argument that is evaluated lazily.  Roughly equivalent to::
 
-      @classmethod
       def from_iterable(iterables):
           # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
           for it in iterables:
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index 848fd16..fc909f2 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -757,8 +757,6 @@ as internal buffering of data.
 
    As of Python 3.3, this is equivalent to ``os.pathconf(fd, name)``.
 
-   Availability: Unix.
-
 
 .. function:: fstat(fd)
 
diff --git a/Doc/library/profile.rst b/Doc/library/profile.rst
index 3f2a02d..aefc024 100644
--- a/Doc/library/profile.rst
+++ b/Doc/library/profile.rst
@@ -247,11 +247,13 @@ functions:
       import cProfile, pstats, io
       pr = cProfile.Profile()
       pr.enable()
-      ... do something ...
+      # ... do something ...
       pr.disable()
       s = io.StringIO()
-      ps = pstats.Stats(pr, stream=s)
-      ps.print_results()
+      sortby = 'cumulative'
+      ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
+      ps.print_stats()
+      print(s.getvalue())
 
    .. method:: enable()
 
diff --git a/Doc/library/test.rst b/Doc/library/test.rst
index bce0f64..c1270f4 100644
--- a/Doc/library/test.rst
+++ b/Doc/library/test.rst
@@ -263,12 +263,15 @@ The :mod:`test.support` module defines the following functions:
    Used when tests are executed by :mod:`test.regrtest`.
 
 
-.. function:: findfile(filename)
+.. function:: findfile(filename, subdir=None)
 
    Return the path to the file named *filename*. If no match is found
    *filename* is returned. This does not equal a failure since it could be the
    path to the file.
 
+    Setting *subdir* indicates a relative path to use to find the file
+    rather than looking directly in the path directories.
+
 
 .. function:: run_unittest(\*classes)
 
diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst
index 412bee7..9071227 100644
--- a/Doc/library/unittest.rst
+++ b/Doc/library/unittest.rst
@@ -1674,8 +1674,7 @@ Loading and running tests
 
       A list containing 2-tuples of :class:`TestCase` instances and strings
       holding formatted tracebacks. Each tuple represents a test where a failure
-      was explicitly signalled using the :meth:`TestCase.fail\*` or
-      :meth:`TestCase.assert\*` methods.
+      was explicitly signalled using the :meth:`TestCase.assert\*` methods.
 
    .. attribute:: skipped
 
@@ -1772,7 +1771,7 @@ Loading and running tests
 
    .. method:: addError(test, err)
 
-      Called when the test case *test* raises an unexpected exception *err* is a
+      Called when the test case *test* raises an unexpected exception. *err* is a
       tuple of the form returned by :func:`sys.exc_info`: ``(type, value,
       traceback)``.
 
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index c14f6c7..908a17c 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -511,9 +511,9 @@ conflict.
 
 .. envvar:: PYTHONDONTWRITEBYTECODE
 
-   If this is set, Python won't try to write ``.pyc`` or ``.pyo`` files on the
-   import of source modules.  This is equivalent to specifying the :option:`-B`
-   option.
+   If this is set to a non-empty string, Python won't try to write ``.pyc`` or
+   ``.pyo`` files on the import of source modules.  This is equivalent to
+   specifying the :option:`-B` option.
 
 
 .. envvar:: PYTHONHASHSEED
@@ -582,11 +582,11 @@ conflict.
 
 .. envvar:: PYTHONFAULTHANDLER
 
-   If this environment variable is set, :func:`faulthandler.enable` is called
-   at startup: install a handler for :const:`SIGSEGV`, :const:`SIGFPE`,
-   :const:`SIGABRT`, :const:`SIGBUS` and :const:`SIGILL` signals to dump the
-   Python traceback.  This is equivalent to :option:`-X` ``faulthandler``
-   option.
+   If this environment variable is set to a non-empty string,
+   :func:`faulthandler.enable` is called at startup: install a handler for
+   :const:`SIGSEGV`, :const:`SIGFPE`, :const:`SIGABRT`, :const:`SIGBUS` and
+   :const:`SIGILL` signals to dump the Python traceback.  This is equivalent to
+   :option:`-X` ``faulthandler`` option.
 
    .. versionadded:: 3.3
 
diff --git a/Include/pystate.h b/Include/pystate.h
index e41fe4c..ddc6892 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -118,6 +118,32 @@ typedef struct _ts {
     int trash_delete_nesting;
     PyObject *trash_delete_later;
 
+    /* Called when a thread state is deleted normally, but not when it
+     * is destroyed after fork().
+     * Pain:  to prevent rare but fatal shutdown errors (issue 18808),
+     * Thread.join() must wait for the join'ed thread's tstate to be unlinked
+     * from the tstate chain.  That happens at the end of a thread's life,
+     * in pystate.c.
+     * The obvious way doesn't quite work:  create a lock which the tstate
+     * unlinking code releases, and have Thread.join() wait to acquire that
+     * lock.  The problem is that we _are_ at the end of the thread's life:
+     * if the thread holds the last reference to the lock, decref'ing the
+     * lock will delete the lock, and that may trigger arbitrary Python code
+     * if there's a weakref, with a callback, to the lock.  But by this time
+     * _PyThreadState_Current is already NULL, so only the simplest of C code
+     * can be allowed to run (in particular it must not be possible to
+     * release the GIL).
+     * So instead of holding the lock directly, the tstate holds a weakref to
+     * the lock:  that's the value of on_delete_data below.  Decref'ing a
+     * weakref is harmless.
+     * on_delete points to _threadmodule.c's static release_sentinel() function.
+     * After the tstate is unlinked, release_sentinel is called with the
+     * weakref-to-lock (on_delete_data) argument, and release_sentinel releases
+     * the indirectly held lock.
+     */
+    void (*on_delete)(void *);
+    void *on_delete_data;
+
     /* XXX signal handlers should also be here */
 
 } PyThreadState;
diff --git a/Include/setobject.h b/Include/setobject.h
index f377a73..ae3f556 100644
--- a/Include/setobject.h
+++ b/Include/setobject.h
@@ -105,7 +105,6 @@ PyAPI_FUNC(PyObject *) PySet_Pop(PyObject *set);
 PyAPI_FUNC(int) _PySet_Update(PyObject *set, PyObject *iterable);
 
 PyAPI_FUNC(int) PySet_ClearFreeList(void);
-PyAPI_FUNC(void) _PySet_DebugMallocStats(FILE *out);
 #endif
 
 #ifdef __cplusplus
diff --git a/Lib/_dummy_thread.py b/Lib/_dummy_thread.py
index 13b1f26..b67cfb9 100644
--- a/Lib/_dummy_thread.py
+++ b/Lib/_dummy_thread.py
@@ -81,6 +81,10 @@ def stack_size(size=None):
         raise error("setting thread stack size not supported")
     return 0
 
+def _set_sentinel():
+    """Dummy implementation of _thread._set_sentinel()."""
+    return LockType()
+
 class LockType(object):
     """Class implementing dummy implementation of _thread.LockType.
 
diff --git a/Lib/decimal.py b/Lib/decimal.py
index d39ac1d..fc95ae9 100644
--- a/Lib/decimal.py
+++ b/Lib/decimal.py
@@ -21,7 +21,7 @@ the General Decimal Arithmetic Specification:
 
 and IEEE standard 854-1987:
 
-    www.cs.berkeley.edu/~ejr/projects/754/private/drafts/854-1987/dir.html
+    http://en.wikipedia.org/wiki/IEEE_854-1987
 
 Decimal floating point has finite precision with arbitrarily large bounds.
 
diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py
index 59fb664..27fda9f 100644
--- a/Lib/multiprocessing/connection.py
+++ b/Lib/multiprocessing/connection.py
@@ -878,13 +878,21 @@ else:
 
     import selectors
 
+    # poll/select have the advantage of not requiring any extra file
+    # descriptor, contrarily to epoll/kqueue (also, they require a single
+    # syscall).
+    if hasattr(selectors, 'PollSelector'):
+        _WaitSelector = selectors.PollSelector
+    else:
+        _WaitSelector = selectors.SelectSelector
+
     def wait(object_list, timeout=None):
         '''
         Wait till an object in object_list is ready/readable.
 
         Returns list of those objects in object_list which are ready/readable.
         '''
-        with selectors.DefaultSelector() as selector:
+        with _WaitSelector() as selector:
             for obj in object_list:
                 selector.register(obj, selectors.EVENT_READ)
 
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index 5832ef6..5687ef9 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -860,24 +860,31 @@ if hasattr(os, "umask"):
         finally:
             os.umask(oldmask)
 
-# TEST_HOME refers to the top level directory of the "test" package
+# TEST_HOME_DIR refers to the top level directory of the "test" package
 # that contains Python's regression test suite
-TEST_HOME = os.path.dirname(os.path.abspath(__file__))
+TEST_SUPPORT_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_HOME_DIR = os.path.dirname(TEST_SUPPORT_DIR)
 
-def findfile(file, here=TEST_HOME, subdir=None):
+# TEST_DATA_DIR is used as a target download location for remote resources
+TEST_DATA_DIR = os.path.join(TEST_HOME_DIR, "data")
+
+def findfile(filename, subdir=None):
     """Try to find a file on sys.path or in the test directory.  If it is not
     found the argument passed to the function is returned (this does not
-    necessarily signal failure; could still be the legitimate path)."""
-    if os.path.isabs(file):
-        return file
+    necessarily signal failure; could still be the legitimate path).
+
+    Setting *subdir* indicates a relative path to use to find the file
+    rather than looking directly in the path directories.
+    """
+    if os.path.isabs(filename):
+        return filename
     if subdir is not None:
-        file = os.path.join(subdir, file)
-    path = sys.path
-    path = [os.path.dirname(here)] + path
+        filename = os.path.join(subdir, filename)
+    path = [TEST_HOME_DIR] + sys.path
     for dn in path:
-        fn = os.path.join(dn, file)
+        fn = os.path.join(dn, filename)
         if os.path.exists(fn): return fn
-    return file
+    return filename
 
 def create_empty_file(filename):
     """Create an empty file. If the file already exists, truncate it."""
@@ -914,7 +921,7 @@ def open_urlresource(url, *args, **kw):
 
     filename = urllib.parse.urlparse(url)[2].split('/')[-1] # '/': it's URL!
 
-    fn = os.path.join(os.path.dirname(__file__), "data", filename)
+    fn = os.path.join(TEST_DATA_DIR, filename)
 
     def check_valid_file(fn):
         f = open(fn, *args, **kw)
diff --git a/Lib/test/test_faulthandler.py b/Lib/test/test_faulthandler.py
index 4a8becf..d78bcb0 100644
--- a/Lib/test/test_faulthandler.py
+++ b/Lib/test/test_faulthandler.py
@@ -265,17 +265,33 @@ faulthandler._sigsegv()
         # By default, the module should be disabled
         code = "import faulthandler; print(faulthandler.is_enabled())"
         args = (sys.executable, '-E', '-c', code)
-        # use subprocess module directly because test.script_helper adds
-        # "-X faulthandler" to the command line
-        stdout = subprocess.check_output(args)
-        self.assertEqual(stdout.rstrip(), b"False")
+        # don't use assert_python_ok() because it always enable faulthandler
+        output = subprocess.check_output(args)
+        self.assertEqual(output.rstrip(), b"False")
 
     def test_sys_xoptions(self):
         # Test python -X faulthandler
         code = "import faulthandler; print(faulthandler.is_enabled())"
-        rc, stdout, stderr = assert_python_ok("-X", "faulthandler", "-c", code)
-        stdout = (stdout + stderr).strip()
-        self.assertEqual(stdout, b"True")
+        args = (sys.executable, "-E", "-X", "faulthandler", "-c", code)
+        # don't use assert_python_ok() because it always enable faulthandler
+        output = subprocess.check_output(args)
+        self.assertEqual(output.rstrip(), b"True")
+
+    def test_env_var(self):
+        # empty env var
+        code = "import faulthandler; print(faulthandler.is_enabled())"
+        args = (sys.executable, "-c", code)
+        env = os.environ.copy()
+        env['PYTHONFAULTHANDLER'] = ''
+        # don't use assert_python_ok() because it always enable faulthandler
+        output = subprocess.check_output(args, env=env)
+        self.assertEqual(output.rstrip(), b"False")
+
+        # non-empty env var
+        env = os.environ.copy()
+        env['PYTHONFAULTHANDLER'] = '1'
+        output = subprocess.check_output(args, env=env)
+        self.assertEqual(output.rstrip(), b"True")
 
     def check_dump_traceback(self, filename):
         """
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index d0dd364..39b0e80 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -34,6 +34,10 @@ try:
     import resource
 except ImportError:
     resource = None
+try:
+    import fcntl
+except ImportError:
+    fcntl = None
 
 from test.script_helper import assert_python_ok
 
@@ -2300,19 +2304,38 @@ class CPUCountTests(unittest.TestCase):
 
 
 class FDInheritanceTests(unittest.TestCase):
-    def test_get_inheritable(self):
+    def test_get_set_inheritable(self):
         fd = os.open(__file__, os.O_RDONLY)
         self.addCleanup(os.close, fd)
-        for inheritable in (False, True):
-            os.set_inheritable(fd, inheritable)
-            self.assertEqual(os.get_inheritable(fd), inheritable)
+        self.assertEqual(os.get_inheritable(fd), False)
+
+        os.set_inheritable(fd, True)
+        self.assertEqual(os.get_inheritable(fd), True)
 
-    def test_set_inheritable(self):
+    @unittest.skipIf(fcntl is None, "need fcntl")
+    def test_get_inheritable_cloexec(self):
         fd = os.open(__file__, os.O_RDONLY)
         self.addCleanup(os.close, fd)
-        os.set_inheritable(fd, True)
+        self.assertEqual(os.get_inheritable(fd), False)
+
+        # clear FD_CLOEXEC flag
+        flags = fcntl.fcntl(fd, fcntl.F_GETFD)
+        flags &= ~fcntl.FD_CLOEXEC
+        fcntl.fcntl(fd, fcntl.F_SETFD, flags)
+
         self.assertEqual(os.get_inheritable(fd), True)
 
+    @unittest.skipIf(fcntl is None, "need fcntl")
+    def test_set_inheritable_cloexec(self):
+        fd = os.open(__file__, os.O_RDONLY)
+        self.addCleanup(os.close, fd)
+        self.assertEqual(fcntl.fcntl(fd, fcntl.F_GETFD) & fcntl.FD_CLOEXEC,
+                         fcntl.FD_CLOEXEC)
+
+        os.set_inheritable(fd, True)
+        self.assertEqual(fcntl.fcntl(fd, fcntl.F_GETFD) & fcntl.FD_CLOEXEC,
+                         0)
+
     def test_open(self):
         fd = os.open(__file__, os.O_RDONLY)
         self.addCleanup(os.close, fd)
diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py
index 289fb22..353874b 100644
--- a/Lib/test/test_regrtest.py
+++ b/Lib/test/test_regrtest.py
@@ -3,6 +3,7 @@ Tests of regrtest.py.
 """
 
 import argparse
+import faulthandler
 import getopt
 import os.path
 import unittest
@@ -25,6 +26,8 @@ class ParseArgsTestCase(unittest.TestCase):
                     regrtest._parse_args([opt])
                 self.assertIn('Run Python regression tests.', out.getvalue())
 
+    @unittest.skipUnless(hasattr(faulthandler, 'dump_traceback_later'),
+                         "faulthandler.dump_traceback_later() required")
     def test_timeout(self):
         ns = regrtest._parse_args(['--timeout', '4.2'])
         self.assertEqual(ns.timeout, 4.2)
diff --git a/Lib/test/test_selectors.py b/Lib/test/test_selectors.py
index 2657a50..6ce4d8a 100644
--- a/Lib/test/test_selectors.py
+++ b/Lib/test/test_selectors.py
@@ -301,6 +301,7 @@ class BaseSelectorTestCase(unittest.TestCase):
 
 class ScalableSelectorMixIn:
 
+    # see issue #18963 for why it's skipped on older OS X versions
     @support.requires_mac_ver(10, 5)
     @unittest.skipUnless(resource, "Test needs resource module")
     def test_above_fd_setsize(self):
@@ -313,7 +314,7 @@ class ScalableSelectorMixIn:
             self.addCleanup(resource.setrlimit, resource.RLIMIT_NOFILE,
                             (soft, hard))
             NUM_FDS = hard
-        except OSError:
+        except (OSError, ValueError):
             NUM_FDS = soft
 
         # guard for already allocated FDs (stdin, stdout...)
diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py
index 4aff932..34d83f2 100644
--- a/Lib/test/test_site.py
+++ b/Lib/test/test_site.py
@@ -5,6 +5,7 @@ executing have not been removed.
 
 """
 import unittest
+import test.support
 from test.support import run_unittest, TESTFN, EnvironmentVarGuard
 from test.support import captured_stderr
 import builtins
@@ -373,9 +374,10 @@ class ImportSideEffectTests(unittest.TestCase):
         self.assertTrue(hasattr(builtins, "exit"))
 
     def test_setting_copyright(self):
-        # 'copyright' and 'credits' should be in builtins
+        # 'copyright', 'credits', and 'license' should be in builtins
         self.assertTrue(hasattr(builtins, "copyright"))
         self.assertTrue(hasattr(builtins, "credits"))
+        self.assertTrue(hasattr(builtins, "license"))
 
     def test_setting_help(self):
         # 'help' should be set in builtins
@@ -402,5 +404,27 @@ class ImportSideEffectTests(unittest.TestCase):
                 self.fail("sitecustomize not imported automatically")
 
 
+class LicenseURL(unittest.TestCase):
+    """Test accessibility of the license."""
+
+    @unittest.skipUnless(str(license).startswith('See http://'),
+                         'license is available as a file')
+    def test_license_page(self):
+        """urlopen should return the license page"""
+        pat = r'^See (http://www\.python\.org/download/releases/[^/]+/license/)$'
+        mo = re.search(pat, str(license))
+        self.assertIsNotNone(mo, msg='can\'t find appropriate url in license')
+        if mo is not None:
+            url = mo.group(1)
+            with test.support.transient_internet(url):
+                import urllib.request, urllib.error
+                try:
+                    with urllib.request.urlopen(url) as data:
+                        code = data.getcode()
+                except urllib.error.HTTPError as e:
+                    code = e.code
+                self.assertEqual(code, 200, msg=url)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
index 6205768..490f776 100644
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -26,6 +26,10 @@ try:
     import multiprocessing
 except ImportError:
     multiprocessing = False
+try:
+    import fcntl
+except ImportError:
+    fcntl = None
 
 HOST = support.HOST
 MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf-8') ## test unicode string and carriage return
@@ -4804,6 +4808,33 @@ class InheritanceTest(unittest.TestCase):
             sock.set_inheritable(False)
             self.assertEqual(sock.get_inheritable(), False)
 
+    @unittest.skipIf(fcntl is None, "need fcntl")
+    def test_get_inheritable_cloexec(self):
+        sock = socket.socket()
+        with sock:
+            fd = sock.fileno()
+            self.assertEqual(sock.get_inheritable(), False)
+
+            # clear FD_CLOEXEC flag
+            flags = fcntl.fcntl(fd, fcntl.F_GETFD)
+            flags &= ~fcntl.FD_CLOEXEC
+            fcntl.fcntl(fd, fcntl.F_SETFD, flags)
+
+            self.assertEqual(sock.get_inheritable(), True)
+
+    @unittest.skipIf(fcntl is None, "need fcntl")
+    def test_set_inheritable_cloexec(self):
+        sock = socket.socket()
+        with sock:
+            fd = sock.fileno()
+            self.assertEqual(fcntl.fcntl(fd, fcntl.F_GETFD) & fcntl.FD_CLOEXEC,
+                             fcntl.FD_CLOEXEC)
+
+            sock.set_inheritable(True)
+            self.assertEqual(fcntl.fcntl(fd, fcntl.F_GETFD) & fcntl.FD_CLOEXEC,
+                             0)
+
+
     @unittest.skipUnless(hasattr(socket, "socketpair"),
                          "need socket.socketpair()")
     def test_socketpair(self):
diff --git a/Lib/test/test_tcl.py b/Lib/test/test_tcl.py
index 4e52fd4..cf717d8 100644
--- a/Lib/test/test_tcl.py
+++ b/Lib/test/test_tcl.py
@@ -15,6 +15,14 @@ support.import_fresh_module('tkinter')
 from tkinter import Tcl
 from _tkinter import TclError
 
+tcl_version = _tkinter.TCL_VERSION.split('.')
+try:
+    for i in range(len(tcl_version)):
+        tcl_version[i] = int(tcl_version[i])
+except ValueError:
+    pass
+tcl_version = tuple(tcl_version)
+
 
 class TkinterTest(unittest.TestCase):
 
@@ -200,9 +208,12 @@ class TclTest(unittest.TestCase):
             (('a', 3.4), ('a', 3.4)),
             ((), ()),
             (call('list', 1, '2', (3.4,)), (1, '2', (3.4,))),
-            (call('dict', 'create', 1, '\u20ac', b'\xe2\x82\xac', (3.4,)),
-                    (1, '\u20ac', '\u20ac', (3.4,))),
         ]
+        if tcl_version >= (8, 5):
+            testcases += [
+                (call('dict', 'create', 1, '\u20ac', b'\xe2\x82\xac', (3.4,)),
+                        (1, '\u20ac', '\u20ac', (3.4,))),
+            ]
         for arg, res in testcases:
             self.assertEqual(splitlist(arg), res, msg=arg)
         self.assertRaises(TclError, splitlist, '{')
@@ -234,9 +245,12 @@ class TclTest(unittest.TestCase):
             (('a', (2, 3.4)), ('a', (2, 3.4))),
             ((), ()),
             (call('list', 1, '2', (3.4,)), (1, '2', (3.4,))),
-            (call('dict', 'create', 12, '\u20ac', b'\xe2\x82\xac', (3.4,)),
-                    (12, '\u20ac', '\u20ac', (3.4,))),
         ]
+        if tcl_version >= (8, 5):
+            testcases += [
+                (call('dict', 'create', 12, '\u20ac', b'\xe2\x82\xac', (3.4,)),
+                        (12, '\u20ac', '\u20ac', (3.4,))),
+            ]
         for arg, res in testcases:
             self.assertEqual(split(arg), res, msg=arg)
 
diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py
index 971a635..75ae247 100644
--- a/Lib/test/test_threading.py
+++ b/Lib/test/test_threading.py
@@ -109,7 +109,7 @@ class ThreadTests(BaseTestCase):
         if verbose:
             print('waiting for all tasks to complete')
         for t in threads:
-            t.join(NUMTASKS)
+            t.join()
             self.assertTrue(not t.is_alive())
             self.assertNotEqual(t.ident, 0)
             self.assertFalse(t.ident is None)
@@ -539,6 +539,40 @@ class ThreadTests(BaseTestCase):
         self.assertEqual(err, b"")
         self.assertEqual(data, "Thread-1\nTrue\nTrue\n")
 
+    def test_tstate_lock(self):
+        # Test an implementation detail of Thread objects.
+        started = _thread.allocate_lock()
+        finish = _thread.allocate_lock()
+        started.acquire()
+        finish.acquire()
+        def f():
+            started.release()
+            finish.acquire()
+            time.sleep(0.01)
+        # The tstate lock is None until the thread is started
+        t = threading.Thread(target=f)
+        self.assertIs(t._tstate_lock, None)
+        t.start()
+        started.acquire()
+        self.assertTrue(t.is_alive())
+        # The tstate lock can't be acquired when the thread is running
+        # (or suspended).
+        tstate_lock = t._tstate_lock
+        self.assertFalse(tstate_lock.acquire(timeout=0), False)
+        finish.release()
+        # When the thread ends, the state_lock can be successfully
+        # acquired.
+        self.assertTrue(tstate_lock.acquire(timeout=5), False)
+        # But is_alive() is still True:  we hold _tstate_lock now, which
+        # prevents is_alive() from knowing the thread's end-of-life C code
+        # is done.
+        self.assertTrue(t.is_alive())
+        # Let is_alive() find out the C code is done.
+        tstate_lock.release()
+        self.assertFalse(t.is_alive())
+        # And verify the thread disposed of _tstate_lock.
+        self.assertTrue(t._tstate_lock is None)
+
 
 class ThreadJoinOnShutdown(BaseTestCase):
 
@@ -613,144 +647,8 @@ class ThreadJoinOnShutdown(BaseTestCase):
             """
         self._run_and_join(script)
 
-    def assertScriptHasOutput(self, script, expected_output):
-        rc, out, err = assert_python_ok("-c", script)
-        data = out.decode().replace('\r', '')
-        self.assertEqual(data, expected_output)
-
-    @unittest.skipUnless(hasattr(os, 'fork'), "needs os.fork()")
-    @unittest.skipIf(sys.platform in platforms_to_skip, "due to known OS bug")
-    def test_4_joining_across_fork_in_worker_thread(self):
-        # There used to be a possible deadlock when forking from a child
-        # thread.  See http://bugs.python.org/issue6643.
-
-        # The script takes the following steps:
-        # - The main thread in the parent process starts a new thread and then
-        #   tries to join it.
-        # - The join operation acquires the Lock inside the thread's _block
-        #   Condition.  (See threading.py:Thread.join().)
-        # - We stub out the acquire method on the condition to force it to wait
-        #   until the child thread forks.  (See LOCK ACQUIRED HERE)
-        # - The child thread forks.  (See LOCK HELD and WORKER THREAD FORKS
-        #   HERE)
-        # - The main thread of the parent process enters Condition.wait(),
-        #   which releases the lock on the child thread.
-        # - The child process returns.  Without the necessary fix, when the
-        #   main thread of the child process (which used to be the child thread
-        #   in the parent process) attempts to exit, it will try to acquire the
-        #   lock in the Thread._block Condition object and hang, because the
-        #   lock was held across the fork.
-
-        script = """if 1:
-            import os, time, threading
-
-            finish_join = False
-            start_fork = False
-
-            def worker():
-                # Wait until this thread's lock is acquired before forking to
-                # create the deadlock.
-                global finish_join
-                while not start_fork:
-                    time.sleep(0.01)
-                # LOCK HELD: Main thread holds lock across this call.
-                childpid = os.fork()
-                finish_join = True
-                if childpid != 0:
-                    # Parent process just waits for child.
-                    os.waitpid(childpid, 0)
-                # Child process should just return.
-
-            w = threading.Thread(target=worker)
-
-            # Stub out the private condition variable's lock acquire method.
-            # This acquires the lock and then waits until the child has forked
-            # before returning, which will release the lock soon after.  If
-            # someone else tries to fix this test case by acquiring this lock
-            # before forking instead of resetting it, the test case will
-            # deadlock when it shouldn't.
-            condition = w._block
-            orig_acquire = condition.acquire
-            call_count_lock = threading.Lock()
-            call_count = 0
-            def my_acquire():
-                global call_count
-                global start_fork
-                orig_acquire()  # LOCK ACQUIRED HERE
-                start_fork = True
-                if call_count == 0:
-                    while not finish_join:
-                        time.sleep(0.01)  # WORKER THREAD FORKS HERE
-                with call_count_lock:
-                    call_count += 1
-            condition.acquire = my_acquire
-
-            w.start()
-            w.join()
-            print('end of main')
-            """
-        self.assertScriptHasOutput(script, "end of main\n")
-
-    @unittest.skipUnless(hasattr(os, 'fork'), "needs os.fork()")
-    @unittest.skipIf(sys.platform in platforms_to_skip, "due to known OS bug")
-    def test_5_clear_waiter_locks_to_avoid_crash(self):
-        # Check that a spawned thread that forks doesn't segfault on certain
-        # platforms, namely OS X.  This used to happen if there was a waiter
-        # lock in the thread's condition variable's waiters list.  Even though
-        # we know the lock will be held across the fork, it is not safe to
-        # release locks held across forks on all platforms, so releasing the
-        # waiter lock caused a segfault on OS X.  Furthermore, since locks on
-        # OS X are (as of this writing) implemented with a mutex + condition
-        # variable instead of a semaphore, while we know that the Python-level
-        # lock will be acquired, we can't know if the internal mutex will be
-        # acquired at the time of the fork.
-
-        script = """if True:
-            import os, time, threading
-
-            start_fork = False
-
-            def worker():
-                # Wait until the main thread has attempted to join this thread
-                # before continuing.
-                while not start_fork:
-                    time.sleep(0.01)
-                childpid = os.fork()
-                if childpid != 0:
-                    # Parent process just waits for child.
-                    (cpid, rc) = os.waitpid(childpid, 0)
-                    assert cpid == childpid
-                    assert rc == 0
-                    print('end of worker thread')
-                else:
-                    # Child process should just return.
-                    pass
-
-            w = threading.Thread(target=worker)
-
-            # Stub out the private condition variable's _release_save method.
-            # This releases the condition's lock and flips the global that
-            # causes the worker to fork.  At this point, the problematic waiter
-            # lock has been acquired once by the waiter and has been put onto
-            # the waiters list.
-            condition = w._block
-            orig_release_save = condition._release_save
-            def my_release_save():
-                global start_fork
-                orig_release_save()
-                # Waiter lock held here, condition lock released.
-                start_fork = True
-            condition._release_save = my_release_save
-
-            w.start()
-            w.join()
-            print('end of main thread')
-            """
-        output = "end of worker thread\nend of main thread\n"
-        self.assertScriptHasOutput(script, output)
-
     @unittest.skipIf(sys.platform in platforms_to_skip, "due to known OS bug")
-    def test_6_daemon_threads(self):
+    def test_4_daemon_threads(self):
         # Check that a daemon thread cannot crash the interpreter on shutdown
         # by manipulating internal structures that are being disposed of in
         # the main thread.
@@ -867,6 +765,38 @@ class SubinterpThreadingTests(BaseTestCase):
         # The thread was joined properly.
         self.assertEqual(os.read(r, 1), b"x")
 
+    def test_threads_join_2(self):
+        # Same as above, but a delay gets introduced after the thread's
+        # Python code returned but before the thread state is deleted.
+        # To achieve this, we register a thread-local object which sleeps
+        # a bit when deallocated.
+        r, w = os.pipe()
+        self.addCleanup(os.close, r)
+        self.addCleanup(os.close, w)
+        code = r"""if 1:
+            import os
+            import threading
+            import time
+
+            class Sleeper:
+                def __del__(self):
+                    time.sleep(0.05)
+
+            tls = threading.local()
+
+            def f():
+                # Sleep a bit so that the thread is still running when
+                # Py_EndInterpreter is called.
+                time.sleep(0.05)
+                tls.x = Sleeper()
+                os.write(%d, b"x")
+            threading.Thread(target=f).start()
+            """ % (w,)
+        ret = _testcapi.run_in_subinterp(code)
+        self.assertEqual(ret, 0)
+        # The thread was joined properly.
+        self.assertEqual(os.read(r, 1), b"x")
+
     def test_daemon_threads_fatal_error(self):
         subinterp_code = r"""if 1:
             import os
diff --git a/Lib/threading.py b/Lib/threading.py
index b6d19d5..1921ee3 100644
--- a/Lib/threading.py
+++ b/Lib/threading.py
@@ -33,6 +33,7 @@ __all__ = ['active_count', 'Condition', 'current_thread', 'enumerate', 'Event',
 # Rename some stuff so "from threading import *" is safe
 _start_new_thread = _thread.start_new_thread
 _allocate_lock = _thread.allocate_lock
+_set_sentinel = _thread._set_sentinel
 get_ident = _thread.get_ident
 ThreadError = _thread.error
 try:
@@ -516,8 +517,6 @@ def _newname(template="Thread-%d"):
 _active_limbo_lock = _allocate_lock()
 _active = {}    # maps thread id to Thread object
 _limbo = {}
-
-# For debug and leak testing
 _dangling = WeakSet()
 
 # Main class for threads
@@ -548,28 +547,33 @@ class Thread:
         else:
             self._daemonic = current_thread().daemon
         self._ident = None
+        self._tstate_lock = None
         self._started = Event()
-        self._stopped = False
-        self._block = Condition(Lock())
+        self._is_stopped = False
         self._initialized = True
         # sys.stderr is not stored in the class like
         # sys.exc_info since it can be changed between instances
         self._stderr = _sys.stderr
+        # For debugging and _after_fork()
         _dangling.add(self)
 
-    def _reset_internal_locks(self):
+    def _reset_internal_locks(self, is_alive):
         # private!  Called by _after_fork() to reset our internal locks as
         # they may be in an invalid state leading to a deadlock or crash.
-        if hasattr(self, '_block'):  # DummyThread deletes _block
-            self._block.__init__()
         self._started._reset_internal_locks()
+        if is_alive:
+            self._set_tstate_lock()
+        else:
+            # The thread isn't alive after fork: it doesn't have a tstate
+            # anymore.
+            self._tstate_lock = None
 
     def __repr__(self):
         assert self._initialized, "Thread.__init__() was not called"
         status = "initial"
         if self._started.is_set():
             status = "started"
-        if self._stopped:
+        if self._is_stopped:
             status = "stopped"
         if self._daemonic:
             status += " daemon"
@@ -625,9 +629,18 @@ class Thread:
     def _set_ident(self):
         self._ident = get_ident()
 
+    def _set_tstate_lock(self):
+        """
+        Set a lock object which will be released by the interpreter when
+        the underlying thread state (see pystate.h) gets deleted.
+        """
+        self._tstate_lock = _set_sentinel()
+        self._tstate_lock.acquire()
+
     def _bootstrap_inner(self):
         try:
             self._set_ident()
+            self._set_tstate_lock()
             self._started.set()
             with _active_limbo_lock:
                 _active[self._ident] = self
@@ -682,7 +695,6 @@ class Thread:
                 pass
         finally:
             with _active_limbo_lock:
-                self._stop()
                 try:
                     # We don't call self._delete() because it also
                     # grabs _active_limbo_lock.
@@ -691,10 +703,8 @@ class Thread:
                     pass
 
     def _stop(self):
-        self._block.acquire()
-        self._stopped = True
-        self._block.notify_all()
-        self._block.release()
+        self._is_stopped = True
+        self._tstate_lock = None
 
     def _delete(self):
         "Remove current thread from the dict of currently running threads."
@@ -738,21 +748,24 @@ class Thread:
             raise RuntimeError("cannot join thread before it is started")
         if self is current_thread():
             raise RuntimeError("cannot join current thread")
-
-        self._block.acquire()
-        try:
-            if timeout is None:
-                while not self._stopped:
-                    self._block.wait()
-            else:
-                deadline = _time() + timeout
-                while not self._stopped:
-                    delay = deadline - _time()
-                    if delay <= 0:
-                        break
-                    self._block.wait(delay)
-        finally:
-            self._block.release()
+        if timeout is None:
+            self._wait_for_tstate_lock()
+        else:
+            self._wait_for_tstate_lock(timeout=timeout)
+
+    def _wait_for_tstate_lock(self, block=True, timeout=-1):
+        # Issue #18808: wait for the thread state to be gone.
+        # At the end of the thread's life, after all knowledge of the thread
+        # is removed from C data structures, C code releases our _tstate_lock.
+        # This method passes its arguments to _tstate_lock.aquire().
+        # If the lock is acquired, the C code is done, and self._stop() is
+        # called.  That sets ._is_stopped to True, and ._tstate_lock to None.
+        lock = self._tstate_lock
+        if lock is None:  # already determined that the C code is done
+            assert self._is_stopped
+        elif lock.acquire(block, timeout):
+            lock.release()
+            self._stop()
 
     @property
     def name(self):
@@ -771,7 +784,10 @@ class Thread:
 
     def is_alive(self):
         assert self._initialized, "Thread.__init__() not called"
-        return self._started.is_set() and not self._stopped
+        if self._is_stopped or not self._started.is_set():
+            return False
+        self._wait_for_tstate_lock(False)
+        return not self._is_stopped
 
     isAlive = is_alive
 
@@ -835,6 +851,7 @@ class _MainThread(Thread):
 
     def __init__(self):
         Thread.__init__(self, name="MainThread", daemon=False)
+        self._set_tstate_lock()
         self._started.set()
         self._set_ident()
         with _active_limbo_lock:
@@ -854,11 +871,6 @@ class _DummyThread(Thread):
     def __init__(self):
         Thread.__init__(self, name=_newname("Dummy-%d"), daemon=True)
 
-        # Thread._block consumes an OS-level locking primitive, which
-        # can never be used by a _DummyThread.  Since a _DummyThread
-        # instance is immortal, that's bad, so release this resource.
-        del self._block
-
         self._started.set()
         self._set_ident()
         with _active_limbo_lock:
@@ -904,6 +916,14 @@ from _thread import stack_size
 _main_thread = _MainThread()
 
 def _shutdown():
+    # Obscure:  other threads may be waiting to join _main_thread.  That's
+    # dubious, but some code does it.  We can't wait for C code to release
+    # the main thread's tstate_lock - that won't happen until the interpreter
+    # is nearly dead.  So we release it here.  Note that just calling _stop()
+    # isn't enough:  other threads may already be waiting on _tstate_lock.
+    assert _main_thread._tstate_lock is not None
+    assert _main_thread._tstate_lock.locked()
+    _main_thread._tstate_lock.release()
     _main_thread._stop()
     t = _pickSomeNonDaemonThread()
     while t:
@@ -949,18 +969,23 @@ def _after_fork():
     current = current_thread()
     _main_thread = current
     with _active_limbo_lock:
-        for thread in _enumerate():
+        # Dangling thread instances must still have their locks reset,
+        # because someone may join() them.
+        threads = set(_enumerate())
+        threads.update(_dangling)
+        for thread in threads:
             # Any lock/condition variable may be currently locked or in an
             # invalid state, so we reinitialize them.
-            thread._reset_internal_locks()
             if thread is current:
                 # There is only one active thread. We reset the ident to
                 # its new value since it can have changed.
+                thread._reset_internal_locks(True)
                 ident = get_ident()
                 thread._ident = ident
                 new_active[ident] = thread
             else:
                 # All the others are already stopped.
+                thread._reset_internal_locks(False)
                 thread._stop()
 
         _limbo.clear()
diff --git a/Lib/unittest/__init__.py b/Lib/unittest/__init__.py
index 201a3f0..a5d50af 100644
--- a/Lib/unittest/__init__.py
+++ b/Lib/unittest/__init__.py
@@ -11,7 +11,7 @@ Simple usage:
 
     import unittest
 
-    class IntegerArithmenticTestCase(unittest.TestCase):
+    class IntegerArithmeticTestCase(unittest.TestCase):
         def testAdd(self):  ## test method names begin 'test*'
             self.assertEqual((1 + 2), 3)
             self.assertEqual(0 + 1, 1)
diff --git a/Misc/NEWS b/Misc/NEWS
index f9d8654..6f7ede3 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -2,9 +2,6 @@
 Python News
 +++++++++++
 
-What's New in Python 3.4.0 Alpha 3?
-===================================
-
 Projected Release date: 2013-09-29
 
 Core and Builtins
@@ -13,6 +10,17 @@ Core and Builtins
 Library
 -------
 
+- The :envvar:`PYTHONFAULTHANDLER` environment variable now only enables the
+  faulthandler module if the variable is non-empty. Same behaviour than other
+  variables like :envvar:`PYTHONDONTWRITEBYTECODE`.
+
+Tests
+-----
+
+- Issue #18952: Fix regression in support data downloads introduced when
+  test.support was converted to a package. Regression noticed by Zachary
+  Ware.
+
 
 What's New in Python 3.4.0 Alpha 2?
 ===================================
@@ -68,6 +76,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #18808: Thread.join() now waits for the underlying thread state to
+  be destroyed before returning.  This prevents unpredictable aborts in
+  Py_EndInterpreter() when some non-daemon threads are still running.
+
 - Issue #18458: Prevent crashes with newer versions of libedit.  Its readline
   emulation has changed from 0-based indexing to 1-based like gnu readline.
 
@@ -75,7 +87,7 @@ Library
   readline activation code in ``site.py``.
 
 - Issue #18672: Fixed format specifiers for Py_ssize_t in debugging output in
-  the _sre moduel.
+  the _sre module.
 
 - Issue #18830: inspect.getclasstree() no more produces duplicated entries even
   when input list contains duplicates.
diff --git a/Modules/_multiprocessing/multiprocessing.c b/Modules/_multiprocessing/multiprocessing.c
index 30cb5eb..1aaf360 100644
--- a/Modules/_multiprocessing/multiprocessing.c
+++ b/Modules/_multiprocessing/multiprocessing.c
@@ -99,13 +99,15 @@ multiprocessing_send(PyObject *self, PyObject *args)
 {
     HANDLE handle;
     Py_buffer buf;
-    int ret;
+    int ret, length;
 
     if (!PyArg_ParseTuple(args, F_HANDLE "y*:send" , &handle, &buf))
         return NULL;
 
+    length = (int)Py_MIN(buf.len, INT_MAX);
+
     Py_BEGIN_ALLOW_THREADS
-    ret = send((SOCKET) handle, buf.buf, buf.len, 0);
+    ret = send((SOCKET) handle, buf.buf, length, 0);
     Py_END_ALLOW_THREADS
 
     PyBuffer_Release(&buf);
diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c
index cbb2901..d83d117 100644
--- a/Modules/_threadmodule.c
+++ b/Modules/_threadmodule.c
@@ -1172,6 +1172,66 @@ yet finished.\n\
 This function is meant for internal and specialized purposes only.\n\
 In most applications `threading.enumerate()` should be used instead.");
 
+static void
+release_sentinel(void *wr)
+{
+    /* Tricky: this function is called when the current thread state
+       is being deleted.  Therefore, only simple C code can safely
+       execute here. */
+    PyObject *obj = PyWeakref_GET_OBJECT(wr);
+    lockobject *lock;
+    if (obj != Py_None) {
+        assert(Py_TYPE(obj) == &Locktype);
+        lock = (lockobject *) obj;
+        if (lock->locked) {
+            PyThread_release_lock(lock->lock_lock);
+            lock->locked = 0;
+        }
+    }
+    /* Deallocating a weakref with a NULL callback only calls
+       PyObject_GC_Del(), which can't call any Python code. */
+    Py_DECREF(wr);
+}
+
+static PyObject *
+thread__set_sentinel(PyObject *self)
+{
+    PyObject *wr;
+    PyThreadState *tstate = PyThreadState_Get();
+    lockobject *lock;
+
+    if (tstate->on_delete_data != NULL) {
+        /* We must support the re-creation of the lock from a
+           fork()ed child. */
+        assert(tstate->on_delete == &release_sentinel);
+        wr = (PyObject *) tstate->on_delete_data;
+        tstate->on_delete = NULL;
+        tstate->on_delete_data = NULL;
+        Py_DECREF(wr);
+    }
+    lock = newlockobject();
+    if (lock == NULL)
+        return NULL;
+    /* The lock is owned by whoever called _set_sentinel(), but the weakref
+       hangs to the thread state. */
+    wr = PyWeakref_NewRef((PyObject *) lock, NULL);
+    if (wr == NULL) {
+        Py_DECREF(lock);
+        return NULL;
+    }
+    tstate->on_delete_data = (void *) wr;
+    tstate->on_delete = &release_sentinel;
+    return (PyObject *) lock;
+}
+
+PyDoc_STRVAR(_set_sentinel_doc,
+"_set_sentinel() -> lock\n\
+\n\
+Set a sentinel lock that will be released when the current thread\n\
+state is finalized (after it is untied from the interpreter).\n\
+\n\
+This is a private API for the threading module.");
+
 static PyObject *
 thread_stack_size(PyObject *self, PyObject *args)
 {
@@ -1247,6 +1307,8 @@ static PyMethodDef thread_methods[] = {
      METH_NOARGS, _count_doc},
     {"stack_size",              (PyCFunction)thread_stack_size,
      METH_VARARGS, stack_size_doc},
+    {"_set_sentinel",           (PyCFunction)thread__set_sentinel,
+     METH_NOARGS, _set_sentinel_doc},
     {NULL,                      NULL}           /* sentinel */
 };
 
diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c
index 172945d..47bc9e8 100644
--- a/Modules/faulthandler.c
+++ b/Modules/faulthandler.c
@@ -1048,8 +1048,11 @@ faulthandler_env_options(void)
 {
     PyObject *xoptions, *key, *module, *res;
     _Py_IDENTIFIER(enable);
+    char *p;
 
-    if (!Py_GETENV("PYTHONFAULTHANDLER")) {
+    if (!((p = Py_GETENV("PYTHONFAULTHANDLER")) && *p != '\0')) {
+        /* PYTHONFAULTHANDLER environment variable is missing
+           or an empty string */
         int has_key;
 
         xoptions = PySys_GetXOptions();
diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c
index 4bc9192..0123181 100644
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@@ -4,7 +4,7 @@
 
 /* Itertools module written and maintained
    by Raymond D. Hettinger <python@rcn.com>
-   Copyright (c) 2003 Python Software Foundation.
+   Copyright (c) 2003-2013 Python Software Foundation.
    All rights reserved.
 */
 
@@ -4456,6 +4456,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\
 Iterators terminating on the shortest input sequence:\n\
 accumulate(p[, func]) --> p0, p0+p1, p0+p1+p2\n\
 chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\
+chain.from_iterable([p, q, ...]) --> p0, p1, ... plast, q0, q1, ... \n\
 compress(data, selectors) --> (d[0] if s[0]), (d[1] if s[1]), ...\n\
 dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\
 groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)\n\
diff --git a/Objects/object.c b/Objects/object.c
index 693d8c7..8018c6a 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -1955,7 +1955,6 @@ _PyObject_DebugTypeStats(FILE *out)
     _PyFrame_DebugMallocStats(out);
     _PyList_DebugMallocStats(out);
     _PyMethod_DebugMallocStats(out);
-    _PySet_DebugMallocStats(out);
     _PyTuple_DebugMallocStats(out);
 }
 
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 0aec100..23d624f 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -1,22 +1,36 @@
 
 /* set object implementation
+
    Written and maintained by Raymond D. Hettinger <python@rcn.com>
    Derived from Lib/sets.py and Objects/dictobject.c.
 
    Copyright (c) 2003-2013 Python Software Foundation.
    All rights reserved.
+
+   The basic lookup function used by all operations.
+   This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
+
+   The initial probe index is computed as hash mod the table size.
+   Subsequent probe indices are computed as explained in Objects/dictobject.c.
+
+   To improve cache locality, each probe inspects a series of consecutive
+   nearby entries before moving on to probes elsewhere in memory.  This leaves
+   us with a hybrid of linear probing and open addressing.  The linear probing
+   reduces the cost of hash collisions because consecutive memory accesses
+   tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
+   we then use open addressing with the upper bits from the hash value.  This
+   helps break-up long chains of collisions.
+
+   All arithmetic on hash should ignore overflow.
+
+   Unlike the dictionary implementation, the lookkey functions can return
+   NULL if the rich comparison returns an error.
 */
 
 #include "Python.h"
 #include "structmember.h"
 #include "stringlib/eq.h"
 
-/* This must be >= 1 */
-#define PERTURB_SHIFT 5
-
-/* This should be >= PySet_MINSIZE - 1 */
-#define LINEAR_PROBES 9
-
 /* Object used as dummy key to fill deleted entries */
 static PyObject _dummy_struct;
 
@@ -25,46 +39,15 @@ static PyObject _dummy_struct;
 /* Exported for the gdb plugin's benefit. */
 PyObject *_PySet_Dummy = dummy;
 
-#define INIT_NONZERO_SET_SLOTS(so) do {                         \
-    (so)->table = (so)->smalltable;                             \
-    (so)->mask = PySet_MINSIZE - 1;                             \
-    (so)->hash = -1;                                            \
-    } while(0)
-
-#define EMPTY_TO_MINSIZE(so) do {                               \
-    memset((so)->smalltable, 0, sizeof((so)->smalltable));      \
-    (so)->used = (so)->fill = 0;                                \
-    INIT_NONZERO_SET_SLOTS(so);                                 \
-    } while(0)
-
-/* Reuse scheme to save calls to malloc, free, and memset */
-#ifndef PySet_MAXFREELIST
-#define PySet_MAXFREELIST 80
-#endif
-static PySetObject *free_list[PySet_MAXFREELIST];
-static int numfree = 0;
-
-
-/*
-The basic lookup function used by all operations.
-This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
-
-The initial probe index is computed as hash mod the table size.
-Subsequent probe indices are computed as explained in Objects/dictobject.c.
 
-To improve cache locality, each probe inspects a series of consecutive
-nearby entries before moving on to probes elsewhere in memory.  This leaves
-us with a hybrid of linear probing and open addressing.  The linear probing
-reduces the cost of hash collisions because consecutive memory accesses
-tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
-we then use open addressing with the upper bits from the hash value.  This
-helps break-up long chains of collisions.
+/* ======================================================================== */
+/* ======= Begin logic for probing the hash table ========================= */
 
-All arithmetic on hash should ignore overflow.
+/* This should be >= PySet_MINSIZE - 1 */
+#define LINEAR_PROBES 9
 
-Unlike the dictionary implementation, the lookkey functions can return
-NULL if the rich comparison returns an error.
-*/
+/* This must be >= 1 */
+#define PERTURB_SHIFT 5
 
 static setentry *
 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
@@ -168,8 +151,8 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
     while (1) {
         if (entry->key == key
             || (entry->hash == hash
-            && entry->key != dummy
-            && unicode_eq(entry->key, key)))
+                && entry->key != dummy
+                && unicode_eq(entry->key, key)))
             return entry;
         if (entry->key == dummy && freeslot == NULL)
             freeslot = entry;
@@ -200,38 +183,6 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
 }
 
 /*
-Internal routine to insert a new key into the table.
-Used by the public insert routine.
-Eats a reference to key.
-*/
-static int
-set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
-{
-    setentry *entry;
-
-    assert(so->lookup != NULL);
-    entry = so->lookup(so, key, hash);
-    if (entry == NULL)
-        return -1;
-    if (entry->key == NULL) {
-        /* UNUSED */
-        so->fill++;
-        entry->key = key;
-        entry->hash = hash;
-        so->used++;
-    } else if (entry->key == dummy) {
-        /* DUMMY */
-        entry->key = key;
-        entry->hash = hash;
-        so->used++;
-    } else {
-        /* ACTIVE */
-        Py_DECREF(key);
-    }
-    return 0;
-}
-
-/*
 Internal routine used by set_table_resize() to insert an item which is
 known to be absent from the set.  This routine also assumes that
 the set contains no deleted entries.  Besides the performance benefit,
@@ -268,6 +219,42 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
     so->used++;
 }
 
+/* ======== End logic for probing the hash table ========================== */
+/* ======================================================================== */
+
+
+/*
+Internal routine to insert a new key into the table.
+Used by the public insert routine.
+Eats a reference to key.
+*/
+static int
+set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
+{
+    setentry *entry;
+
+    assert(so->lookup != NULL);
+    entry = so->lookup(so, key, hash);
+    if (entry == NULL)
+        return -1;
+    if (entry->key == NULL) {
+        /* UNUSED */
+        so->fill++;
+        entry->key = key;
+        entry->hash = hash;
+        so->used++;
+    } else if (entry->key == dummy) {
+        /* DUMMY */
+        entry->key = key;
+        entry->hash = hash;
+        so->used++;
+    } else {
+        /* ACTIVE */
+        Py_DECREF(key);
+    }
+    return 0;
+}
+
 /*
 Restructure the table by allocating a new table and reinserting all
 keys again.  When entries have been deleted, the new table may
@@ -441,6 +428,17 @@ set_discard_key(PySetObject *so, PyObject *key)
     return DISCARD_FOUND;
 }
 
+static void
+set_empty_to_minsize(PySetObject *so)
+{
+    memset(so->smalltable, 0, sizeof(so->smalltable));
+    so->fill = 0;
+    so->used = 0;
+    so->mask = PySet_MINSIZE - 1;
+    so->table = so->smalltable;
+    so->hash = -1;
+}
+
 static int
 set_clear_internal(PySetObject *so)
 {
@@ -448,14 +446,13 @@ set_clear_internal(PySetObject *so)
     int table_is_malloced;
     Py_ssize_t fill;
     setentry small_copy[PySet_MINSIZE];
-#ifdef Py_DEBUG
-    Py_ssize_t i, n;
-    assert (PyAnySet_Check(so));
 
-    n = so->mask + 1;
-    i = 0;
+#ifdef Py_DEBUG
+    Py_ssize_t i = 0;
+    Py_ssize_t n = so->mask + 1;
 #endif
 
+    assert (PyAnySet_Check(so));
     table = so->table;
     assert(table != NULL);
     table_is_malloced = table != so->smalltable;
@@ -468,7 +465,7 @@ set_clear_internal(PySetObject *so)
      */
     fill = so->fill;
     if (table_is_malloced)
-        EMPTY_TO_MINSIZE(so);
+        set_empty_to_minsize(so);
 
     else if (fill > 0) {
         /* It's a small table with something that needs to be cleared.
@@ -477,7 +474,7 @@ set_clear_internal(PySetObject *so)
          */
         memcpy(small_copy, table, sizeof(small_copy));
         table = small_copy;
-        EMPTY_TO_MINSIZE(so);
+        set_empty_to_minsize(so);
     }
     /* else it's a small table that's already empty */
 
@@ -560,10 +557,7 @@ set_dealloc(PySetObject *so)
     }
     if (so->table != so->smalltable)
         PyMem_DEL(so->table);
-    if (numfree < PySet_MAXFREELIST && PyAnySet_CheckExact(so))
-        free_list[numfree++] = so;
-    else
-        Py_TYPE(so)->tp_free(so);
+    Py_TYPE(so)->tp_free(so);
     Py_TRASHCAN_SAFE_END(so)
 }
 
@@ -1018,24 +1012,16 @@ make_new_set(PyTypeObject *type, PyObject *iterable)
     PySetObject *so = NULL;
 
     /* create PySetObject structure */
-    if (numfree &&
-        (type == &PySet_Type  ||  type == &PyFrozenSet_Type)) {
-        so = free_list[--numfree];
-        assert (so != NULL && PyAnySet_CheckExact(so));
-        Py_TYPE(so) = type;
-        _Py_NewReference((PyObject *)so);
-        EMPTY_TO_MINSIZE(so);
-        PyObject_GC_Track(so);
-    } else {
-        so = (PySetObject *)type->tp_alloc(type, 0);
-        if (so == NULL)
-            return NULL;
-        /* tp_alloc has already zeroed the structure */
-        assert(so->table == NULL && so->fill == 0 && so->used == 0);
-        INIT_NONZERO_SET_SLOTS(so);
-    }
+    so = (PySetObject *)type->tp_alloc(type, 0);
+    if (so == NULL)
+        return NULL;
 
+    so->fill = 0;
+    so->used = 0;
+    so->mask = PySet_MINSIZE - 1;
+    so->table = so->smalltable;
     so->lookup = set_lookkey_unicode;
+    so->hash = -1;
     so->weakreflist = NULL;
 
     if (iterable != NULL) {
@@ -1098,34 +1084,15 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 int
 PySet_ClearFreeList(void)
 {
-    int freelist_size = numfree;
-    PySetObject *so;
-
-    while (numfree) {
-        numfree--;
-        so = free_list[numfree];
-        PyObject_GC_Del(so);
-    }
-    return freelist_size;
+    return 0;
 }
 
 void
 PySet_Fini(void)
 {
-    PySet_ClearFreeList();
     Py_CLEAR(emptyfrozenset);
 }
 
-/* Print summary info about the state of the optimized allocator */
-void
-_PySet_DebugMallocStats(FILE *out)
-{
-    _PyDebugAllocatorStats(out,
-                           "free PySetObject",
-                           numfree, sizeof(PySetObject));
-}
-
-
 static PyObject *
 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
@@ -2398,7 +2365,7 @@ test_c_api(PySetObject *so)
     Py_ssize_t count;
     char *s;
     Py_ssize_t i;
-    PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x;
+    PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
     PyObject *ob = (PyObject *)so;
     Py_hash_t hash;
     PyObject *str;
diff --git a/Python/pystate.c b/Python/pystate.c
index 924b6a2..ecd00ce 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -208,6 +208,8 @@ new_threadstate(PyInterpreterState *interp, int init)
 
         tstate->trash_delete_nesting = 0;
         tstate->trash_delete_later = NULL;
+        tstate->on_delete = NULL;
+        tstate->on_delete_data = NULL;
 
         if (init)
             _PyThreadState_Init(tstate);
@@ -390,6 +392,9 @@ tstate_delete_common(PyThreadState *tstate)
     if (tstate->next)
         tstate->next->prev = tstate->prev;
     HEAD_UNLOCK();
+    if (tstate->on_delete != NULL) {
+        tstate->on_delete(tstate->on_delete_data);
+    }
     PyMem_RawFree(tstate);
 }