16 files changed, 424 insertions, 254 deletions
diff --git a/Tools/buildbot/external-common.bat b/Tools/buildbot/external-common.bat
index 1ff282e..244d5f5 100644
--- a/Tools/buildbot/external-common.bat
+++ b/Tools/buildbot/external-common.bat
@@ -41,3 +41,8 @@ if not exist sqlite-3.7.4 (
   rd /s/q sqlite-source-3.6.21
   svn export http://svn.python.org/projects/external/sqlite-3.7.4
 )
+
+@rem lzma
+if not exist xz-5.0.3 (
+  svn export http://svn.python.org/projects/external/xz-5.0.3
+)
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index 8bbbb10..30347cb 100644
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -49,6 +49,11 @@ import sys
 _type_char_ptr = gdb.lookup_type('char').pointer() # char*
 _type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
 _type_void_ptr = gdb.lookup_type('void').pointer() # void*
+_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer()
+_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer()
+
+# value computed later, see PyUnicodeObjectPtr.proxy()
+_is_pep393 = None
 
 SIZEOF_VOID_P = _type_void_ptr.sizeof
 
@@ -322,7 +327,6 @@ class PyObjectPtr(object):
 
         name_map = {'bool': PyBoolObjectPtr,
                     'classobj': PyClassObjectPtr,
-                    'instance': PyInstanceObjectPtr,
                     'NoneType': PyNoneStructPtr,
                     'frame': PyFrameObjectPtr,
                     'set' : PySetObjectPtr,
@@ -396,7 +400,7 @@ class ProxyAlreadyVisited(object):
 
 
 def _write_instance_repr(out, visited, name, pyop_attrdict, address):
-    '''Shared code for use by old-style and new-style classes:
+    '''Shared code for use by all classes:
     write a representation to file-like object "out"'''
     out.write('<')
     out.write(name)
@@ -479,7 +483,7 @@ class HeapTypeObjectPtr(PyObjectPtr):
 
     def proxyval(self, visited):
         '''
-        Support for new-style classes.
+        Support for classes.
 
         Currently we just locate the dictionary using a transliteration to
         python of _PyObject_GetDictPtr, ignoring descriptors
@@ -496,7 +500,7 @@ class HeapTypeObjectPtr(PyObjectPtr):
             attr_dict = {}
         tp_name = self.safe_tp_name()
 
-        # New-style class:
+        # Class:
         return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
 
     def write_repr(self, out, visited):
@@ -668,44 +672,6 @@ class PyDictObjectPtr(PyObjectPtr):
             pyop_value.write_repr(out, visited)
         out.write('}')
 
-class PyInstanceObjectPtr(PyObjectPtr):
-    _typename = 'PyInstanceObject'
-
-    def proxyval(self, visited):
-        # Guard against infinite loops:
-        if self.as_address() in visited:
-            return ProxyAlreadyVisited('<...>')
-        visited.add(self.as_address())
-
-        # Get name of class:
-        in_class = self.pyop_field('in_class')
-        cl_name = in_class.pyop_field('cl_name').proxyval(visited)
-
-        # Get dictionary of instance attributes:
-        in_dict = self.pyop_field('in_dict').proxyval(visited)
-
-        # Old-style class:
-        return InstanceProxy(cl_name, in_dict, long(self._gdbval))
-
-    def write_repr(self, out, visited):
-        # Guard against infinite loops:
-        if self.as_address() in visited:
-            out.write('<...>')
-            return
-        visited.add(self.as_address())
-
-        # Old-style class:
-
-        # Get name of class:
-        in_class = self.pyop_field('in_class')
-        cl_name = in_class.pyop_field('cl_name').proxyval(visited)
-
-        # Get dictionary of instance attributes:
-        pyop_in_dict = self.pyop_field('in_dict')
-
-        _write_instance_repr(out, visited,
-                             cl_name, pyop_in_dict, self.as_address())
-
 class PyListObjectPtr(PyObjectPtr):
     _typename = 'PyListObject'
 
@@ -1123,15 +1089,46 @@ class PyUnicodeObjectPtr(PyObjectPtr):
         return _type_Py_UNICODE.sizeof
 
     def proxyval(self, visited):
-        # From unicodeobject.h:
-        #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
-        #     Py_UNICODE *str;    /* Raw Unicode buffer */
-        field_length = long(self.field('length'))
-        field_str = self.field('str')
+        global _is_pep393
+        if _is_pep393 is None:
+            fields = gdb.lookup_type('PyUnicodeObject').target().fields()
+            _is_pep393 = 'data' in [f.name for f in fields]
+        if _is_pep393:
+            # Python 3.3 and newer
+            may_have_surrogates = False
+            compact = self.field('_base')
+            ascii = compact['_base']
+            state = ascii['state']
+            is_compact_ascii = (int(state['ascii']) and int(state['compact']))
+            if not int(state['ready']):
+                # string is not ready
+                field_length = long(compact['wstr_length'])
+                may_have_surrogates = True
+                field_str = ascii['wstr']
+            else:
+                field_length = long(ascii['length'])
+                if is_compact_ascii:
+                    field_str = ascii.address + 1
+                elif int(state['compact']):
+                    field_str = compact.address + 1
+                else:
+                    field_str = self.field('data')['any']
+                repr_kind = int(state['kind'])
+                if repr_kind == 1:
+                    field_str = field_str.cast(_type_unsigned_char_ptr)
+                elif repr_kind == 2:
+                    field_str = field_str.cast(_type_unsigned_short_ptr)
+                elif repr_kind == 4:
+                    field_str = field_str.cast(_type_unsigned_int_ptr)
+        else:
+            # Python 3.2 and earlier
+            field_length = long(self.field('length'))
+            field_str = self.field('str')
+            may_have_surrogates = self.char_width() == 2
 
         # Gather a list of ints from the Py_UNICODE array; these are either
-        # UCS-2 or UCS-4 code points:
-        if self.char_width() > 2:
+        # UCS-1, UCS-2 or UCS-4 code points:
+        if not may_have_surrogates:
             Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
         else:
             # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
diff --git a/Tools/iobench/iobench.py b/Tools/iobench/iobench.py
index 5ec6f17..408be7b 100644
--- a/Tools/iobench/iobench.py
+++ b/Tools/iobench/iobench.py
@@ -1,13 +1,14 @@
 # -*- coding: utf-8 -*-
 # This file should be kept compatible with both Python 2.6 and Python >= 3.0.
 
-import time
+import functools
+import hashlib
+import itertools
 import os
+import platform
 import re
 import sys
-import hashlib
-import functools
-import itertools
+import time
 from optparse import OptionParser
 
 out = sys.stdout
@@ -307,6 +308,16 @@ def run_all_tests(options):
         "large": 2,
     }
 
+    print("Python %s" % sys.version)
+    if sys.version_info < (3, 3):
+        if sys.maxunicode > 0xffff:
+            text = "UCS-4 (wide build)"
+        else:
+            text = "UTF-16 (narrow build)"
+    else:
+        text = "PEP 393"
+    print("Unicode: %s" % text)
+    print(platform.platform())
     binary_files = list(get_binary_files())
     text_files = list(get_text_files())
     if "b" in options:
diff --git a/Tools/msi/msi.py b/Tools/msi/msi.py
index 53e652d..ff490be 100644
--- a/Tools/msi/msi.py
+++ b/Tools/msi/msi.py
@@ -119,6 +119,7 @@ pythondll_uuid = {
     "30":"{6953bc3b-6768-4291-8410-7914ce6e2ca8}",
     "31":"{4afcba0b-13e4-47c3-bebe-477428b46913}",
     "32":"{3ff95315-1096-4d31-bd86-601d5438ad5e}",
+    "33":"{f7581ca4-d368-4eea-8f82-d48c64c4f047}",
     } [major+minor]
 
 # Compute the name that Sphinx gives to the docfile
@@ -1011,7 +1012,7 @@ def add_files(db):
         # package READMEs if present
         lib.glob("README")
         if dir=='Lib':
-            lib.add_file('wsgiref.egg-info')
+            lib.add_file("sysconfig.cfg")
         if dir=='test' and parent.physical=='Lib':
             lib.add_file("185test.db")
             lib.add_file("audiotest.au")
@@ -1047,7 +1048,7 @@ def add_files(db):
         if dir=="Icons":
             lib.glob("*.gif")
             lib.add_file("idle.icns")
-        if dir=="command" and parent.physical=="distutils":
+        if dir=="command" and parent.physical in ("distutils", "packaging"):
             lib.glob("wininst*.exe")
             lib.add_file("command_template")
         if dir=="lib2to3":
@@ -1059,14 +1060,14 @@ def add_files(db):
             lib.add_file("turtle.cfg")
         if dir=="pydoc_data":
             lib.add_file("_pydoc.css")
-        if dir=="data" and parent.physical=="test" and parent.basedir.physical=="email":
+        if dir=="data" and parent.physical=="test_email":
             # This should contain all non-.svn files listed in subversion
             for f in os.listdir(lib.absolute):
                 if f.endswith(".txt") or f==".svn":continue
                 if f.endswith(".au") or f.endswith(".gif"):
                     lib.add_file(f)
                 else:
-                    print("WARNING: New file %s in email/test/data" % f)
+                    print("WARNING: New file %s in test/test_email/data" % f)
         for f in os.listdir(lib.absolute):
             if os.path.isdir(os.path.join(lib.absolute, f)):
                 pydirs.append((lib, f))
@@ -1158,6 +1159,8 @@ def add_files(db):
             lib.add_file("README.txt", src="README")
         if f == 'Scripts':
             lib.add_file("2to3.py", src="2to3")
+            lib.add_file("pydoc3.py", src="pydoc3")
+            lib.add_file("pysetup3.py", src="pysetup3")
             if have_tcl:
                 lib.start_component("pydocgui.pyw", tcltk, keyfile="pydocgui.pyw")
                 lib.add_file("pydocgui.pyw")
diff --git a/Tools/msi/uuids.py b/Tools/msi/uuids.py
index e06cdfb..b401b29 100644
--- a/Tools/msi/uuids.py
+++ b/Tools/msi/uuids.py
@@ -7,66 +7,6 @@
 # generated. For official releases, we record the product codes,
 # so people can refer to them.
 product_codes = {
-    '2.5.101': '{bc14ce3e-5e72-4a64-ac1f-bf59a571898c}', # 2.5a1
-    '2.5.102': '{5eed51c1-8e9d-4071-94c5-b40de5d49ba5}', # 2.5a2
-    '2.5.103': '{73dcd966-ffec-415f-bb39-8342c1f47017}', # 2.5a3
-    '2.5.111': '{c797ecf8-a8e6-4fec-bb99-526b65f28626}', # 2.5b1
-    '2.5.112': '{32beb774-f625-439d-b587-7187487baf15}', # 2.5b2
-    '2.5.113': '{89f23918-11cf-4f08-be13-b9b2e6463fd9}', # 2.5b3
-    '2.5.121': '{8e9321bc-6b24-48a3-8fd4-c95f8e531e5f}', # 2.5c1
-    '2.5.122': '{a6cd508d-9599-45da-a441-cbffa9f7e070}', # 2.5c2
-    '2.5.150': '{0a2c5854-557e-48c8-835a-3b9f074bdcaa}', # 2.5.0
-    '2.5.1121':'{0378b43e-6184-4c2f-be1a-4a367781cd54}', # 2.5.1c1
-    '2.5.1150':'{31800004-6386-4999-a519-518f2d78d8f0}', # 2.5.1
-    '2.5.2150':'{6304a7da-1132-4e91-a343-a296269eab8a}', # 2.5.2c1
-    '2.5.2150':'{6b976adf-8ae8-434e-b282-a06c7f624d2f}', # 2.5.2
-    '2.6.101': '{0ba82e1b-52fd-4e03-8610-a6c76238e8a8}', # 2.6a1
-    '2.6.102': '{3b27e16c-56db-4570-a2d3-e9a26180c60b}', # 2.6a2
-    '2.6.103': '{cd06a9c5-bde5-4bd7-9874-48933997122a}', # 2.6a3
-    '2.6.104': '{dc6ed634-474a-4a50-a547-8de4b7491e53}', # 2.6a4
-    '2.6.111': '{3f82079a-5bee-4c4a-8a41-8292389e24ae}', # 2.6b1
-    '2.6.112': '{8a0e5970-f3e6-4737-9a2b-bc5ff0f15fb5}', # 2.6b2
-    '2.6.113': '{df4f5c21-6fcc-4540-95de-85feba634e76}', # 2.6b3
-    '2.6.121': '{bbd34464-ddeb-4028-99e5-f16c4a8fbdb3}', # 2.6c1
-    '2.6.122': '{8f64787e-a023-4c60-bfee-25d3a3f592c6}', # 2.6c2
-    '2.6.150': '{110eb5c4-e995-4cfb-ab80-a5f315bea9e8}', # 2.6.0
-    '2.6.1150':'{9cc89170-000b-457d-91f1-53691f85b223}', # 2.6.1
-    '2.6.2121':'{adac412b-b209-4c15-b6ab-dca1b6e47144}', # 2.6.2c1
-    '2.6.2150':'{24aab420-4e30-4496-9739-3e216f3de6ae}', # 2.6.2
-    '2.6.3121':'{a73e0254-dcda-4fe4-bf37-c7e1c4f4ebb6}', # 2.6.3c1
-    '2.6.3150':'{3d9ac095-e115-4e94-bdef-7f7edf17697d}', # 2.6.3
-    '2.6.4121':'{727de605-0359-4606-a94b-c2033652379b}', # 2.6.4c1
-    '2.6.4122':'{4f7603c6-6352-4299-a398-150a31b19acc}', # 2.6.4c2
-    '2.6.4150':'{e7394a0f-3f80-45b1-87fc-abcd51893246}', # 2.6.4
-    '2.6.5121':'{e0e273d7-7598-4701-8325-c90c069fd5ff}', # 2.6.5c1
-    '2.6.5122':'{fa227b76-0671-4dc6-b826-c2ff2a70dfd5}', # 2.6.5c2
-    '2.6.5150':'{4723f199-fa64-4233-8e6e-9fccc95a18ee}', # 2.6.5
-    '2.7.101': '{eca1bbef-432c-49ae-a667-c213cc7bbf22}', # 2.7a1
-    '2.7.102': '{21ce16ed-73c4-460d-9b11-522f417b2090}', # 2.7a2
-    '2.7.103': '{6e7dbd55-ba4a-48ac-a688-6c75db4d7500}', # 2.7a3
-    '2.7.104': '{ee774ba3-74a5-48d9-b425-b35a287260c8}', # 2.7a4
-    '2.7.111': '{9cfd9ec7-a9c7-4980-a1c6-054fc6493eb3}', # 2.7b1
-    '2.7.112': '{9a72faf6-c304-4165-8595-9291ff30cac6}', # 2.7b2
-    '2.7.121': '{f530c94a-dd53-4de9-948e-b632b9cb48d2}', # 2.7c1
-    '2.7.122': '{f80905d2-dd8d-4b8e-8a40-c23c93dca07d}', # 2.7c2
-    '2.7.150': '{20c31435-2a0a-4580-be8b-ac06fc243ca4}', # 2.7.0
-    '3.0.101': '{8554263a-3242-4857-9359-aa87bc2c58c2}', # 3.0a1
-    '3.0.102': '{692d6e2c-f0ac-40b8-a133-7191aeeb67f9}', # 3.0a2
-    '3.0.103': '{49cb2995-751a-4753-be7a-d0b1bb585e06}', # 3.0a3
-    '3.0.104': '{87cb019e-19fd-4238-b1c7-85751437d646}', # 3.0a4
-    '3.0.105': '{cf2659af-19ec-43d2-8c35-0f6a09439d42}', # 3.0a5
-    '3.0.111': '{36c26f55-837d-45cf-848c-5f5c0fb47a28}', # 3.0b1
-    '3.0.112': '{056a0fbc-c8fe-4c61-aade-c4411b70c998}', # 3.0b2
-    '3.0.113': '{2b2e89a9-83af-43f9-b7d5-96e80c5a3f26}', # 3.0b3
-    '3.0.114': '{e95c31af-69be-4dd7-96e6-e5fc85e660e6}', # 3.0b4
-    '3.0.121': '{d0979c5e-cd3c-42ec-be4c-e294da793573}', # 3.0c1
-    '3.0.122': '{f707b8e9-a257-4045-818e-4923fc20fbb6}', # 3.0c2
-    '3.0.123': '{5e7208f1-8643-4ea2-ab5e-4644887112e3}', # 3.0c3
-    '3.0.150': '{e0e56e21-55de-4f77-a109-1baa72348743}', # 3.0.0
-    '3.0.1121':'{d35b1ea5-3d70-4872-bf7e-cd066a77a9c9}', # 3.0.1c1
-    '3.0.1150':'{de2f2d9c-53e2-40ee-8209-74da63cb060e}', # 3.0.1
-    '3.0.2121':'{cef79e7f-9809-49e2-afd2-e24148d7c855}', # 3.0.2c1
-    '3.0.2150':'{0cf3b95a-8382-4607-9779-c36407ff362c}', # 3.0.2
     '3.1.101': '{c423eada-c498-4d51-9eb4-bfeae647e0a0}', # 3.1a1
     '3.1.102': '{f6e199bf-dc64-42f3-87d4-1525991a013e}', # 3.1a2
     '3.1.111': '{c3c82893-69b2-4676-8554-1b6ee6c191e9}', # 3.1b1
diff --git a/Tools/pybench/pybench.py b/Tools/pybench/pybench.py
index 8eaad63..cc1e55c 100755
--- a/Tools/pybench/pybench.py
+++ b/Tools/pybench/pybench.py
@@ -107,6 +107,7 @@ def get_machine_details():
         print('Getting machine details...')
     buildno, builddate = platform.python_build()
     python = platform.python_version()
+    # XXX this is now always UCS4, maybe replace it with 'PEP393' in 3.3+?
     if sys.maxunicode == 65535:
         # UCS2 build (standard)
         unitype = 'UCS2'
diff --git a/Tools/scripts/README b/Tools/scripts/README
index 8c02529..eb28a9e 100644
--- a/Tools/scripts/README
+++ b/Tools/scripts/README
@@ -15,7 +15,7 @@ db2pickle.py            Dump a database file to a pickle
 diff.py                 Print file diffs in context, unified, or ndiff formats
 dutree.py               Format du(1) output as a tree sorted by size
 eptags.py               Create Emacs TAGS file for Python modules
-find_recursionlimit.py  Find the maximum recursion limit on this machine 
+find_recursionlimit.py  Find the maximum recursion limit on this machine
 finddiv.py              A grep-like tool that looks for division operators
 findlinksto.py          Recursively find symbolic links to a given path prefix
 findnocoding.py         Find source files which need an encoding declaration
@@ -53,6 +53,7 @@ redemo.py               Basic regular expression demonstration facility
 reindent.py             Change .py files to use 4-space indents
 reindent-rst.py         Fix-up reStructuredText file whitespace
 rgrep.py                Reverse grep through a file (useful for big logfiles)
+run_tests.py            Run the test suite with more sensible default options
 serve.py                Small wsgiref-based web server, used in make serve in Doc
 suff.py                 Sort a list of files by suffix
 svneol.py               Set svn:eol-style on all files in directory
diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py
index 77607ce..c42fa7c 100755
--- a/Tools/scripts/findnocoding.py
+++ b/Tools/scripts/findnocoding.py
@@ -2,7 +2,7 @@
 
 """List all those Python files that require a coding directive
 
-Usage: nocoding.py dir1 [dir2...]
+Usage: findnocoding.py dir1 [dir2...]
 """
 
 __author__ = "Oleg Broytmann, Georg Brandl"
@@ -50,7 +50,7 @@ def has_correct_encoding(text, codec):
 
 def needs_declaration(fullpath):
     try:
-        infile = open(fullpath, 'rU')
+        infile = open(fullpath)
     except IOError: # Oops, the file was removed - ignore it
         return None
 
diff --git a/Tools/scripts/patchcheck.py b/Tools/scripts/patchcheck.py
index dc6b932..3e0155f 100755
--- a/Tools/scripts/patchcheck.py
+++ b/Tools/scripts/patchcheck.py
@@ -39,27 +39,13 @@ def status(message, modal=False, info=None):
 @status("Getting the list of files that have been added/changed",
         info=lambda x: n_files_str(len(x)))
 def changed_files():
-    """Get the list of changed or added files from the VCS."""
-    if os.path.isdir(os.path.join(SRCDIR, '.hg')):
-        vcs = 'hg'
-        cmd = 'hg status --added --modified --no-status'
-    elif os.path.isdir('.svn'):
-        vcs = 'svn'
-        cmd = 'svn status --quiet --non-interactive --ignore-externals'
-    else:
+    """Get the list of changed or added files from Mercurial."""
+    if not os.path.isdir(os.path.join(SRCDIR, '.hg')):
         sys.exit('need a checkout to get modified files')
 
-    st = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
-    try:
-        st.wait()
-        if vcs == 'hg':
-            return [x.decode().rstrip() for x in st.stdout]
-        else:
-            output = (x.decode().rstrip().rsplit(None, 1)[-1]
-                      for x in st.stdout if x[0] in b'AM')
-        return set(path for path in output if os.path.isfile(path))
-    finally:
-        st.stdout.close()
+    cmd = 'hg status --added --modified --no-status'
+    with subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) as st:
+        return [x.decode().rstrip() for x in st.stdout]
 
 
 def report_modified_files(file_paths):
@@ -77,10 +63,8 @@ def report_modified_files(file_paths):
 def normalize_whitespace(file_paths):
     """Make sure that the whitespace for .py files have been normalized."""
     reindent.makebackup = False  # No need to create backups.
-    fixed = []
-    for path in (x for x in file_paths if x.endswith('.py')):
-        if reindent.check(os.path.join(SRCDIR, path)):
-            fixed.append(path)
+    fixed = [path for path in file_paths if path.endswith('.py') and
+             reindent.check(os.path.join(SRCDIR, path))]
     return fixed
 
 
diff --git a/Tools/scripts/pysetup3 b/Tools/scripts/pysetup3
new file mode 100755
index 0000000..e6a908d
--- /dev/null
+++ b/Tools/scripts/pysetup3
@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+import sys
+from packaging.run import main
+sys.exit(main())
diff --git a/Tools/scripts/pysource.py b/Tools/scripts/pysource.py
index 048131e..c7dbe60 100755
--- a/Tools/scripts/pysource.py
+++ b/Tools/scripts/pysource.py
@@ -42,7 +42,7 @@ def _open(fullpath):
         return None
 
     try:
-        return open(fullpath, 'rU')
+        return open(fullpath)
     except IOError as err: # Access denied, or a special file - ignore it
         print_debug("%s: access denied: %s" % (fullpath, err))
         return None
diff --git a/Tools/scripts/reindent.py b/Tools/scripts/reindent.py
index b18993b..4a916ea 100755
--- a/Tools/scripts/reindent.py
+++ b/Tools/scripts/reindent.py
@@ -8,6 +8,8 @@
 -r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
 -v (--verbose)  Verbose.   Print informative msgs; else no output.
+   (--newline)  Newline.   Specify the newline character to use (CRLF, LF).
+                           Default is the same as the original file.
 -h (--help)     Help.      Print this usage information and exit.
 
 Change Python (.py) files to use 4-space indents and no hard tab characters.
@@ -50,6 +52,8 @@ verbose = False
 recurse = False
 dryrun = False
 makebackup = True
+spec_newline = None
+"""A specified newline to be used in the output (set by --newline option)"""
 
 
 def usage(msg=None):
@@ -62,13 +66,12 @@ def errprint(*args):
     sys.stderr.write(" ".join(str(arg) for arg in args))
     sys.stderr.write("\n")
 
-
 def main():
     import getopt
-    global verbose, recurse, dryrun, makebackup
+    global verbose, recurse, dryrun, makebackup, spec_newline
     try:
         opts, args = getopt.getopt(sys.argv[1:], "drnvh",
-                        ["dryrun", "recurse", "nobackup", "verbose", "help"])
+            ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
     except getopt.error as msg:
         usage(msg)
         return
@@ -81,6 +84,11 @@ def main():
             makebackup = False
         elif o in ('-v', '--verbose'):
             verbose = True
+        elif o in ('--newline',):
+            if not a.upper() in ('CRLF', 'LF'):
+                usage()
+                return
+            spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
         elif o in ('-h', '--help'):
             usage()
             return
@@ -118,9 +126,9 @@ def check(file):
         errprint("%s: I/O Error: %s" % (file, str(msg)))
         return
 
-    newline = r.newlines
+    newline = spec_newline if spec_newline else r.newlines
     if isinstance(newline, tuple):
-        errprint("%s: mixed newlines detected; cannot process file" % file)
+        errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
         return
 
     if r.run():
diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py
new file mode 100755
index 0000000..fb7ce5c
--- /dev/null
+++ b/Tools/scripts/run_tests.py
@@ -0,0 +1,48 @@
+"""Run Python's test suite in a fast, rigorous way.
+
+The defaults are meant to be reasonably thorough, while skipping certain
+tests that can be time-consuming or resource-intensive (e.g. largefile),
+or distracting (e.g. audio and gui). These defaults can be overridden by
+simply passing a -u option to this script.
+
+"""
+
+import os
+import sys
+import test.support
+
+
+def is_multiprocess_flag(arg):
+    return arg.startswith('-j') or arg.startswith('--multiprocess')
+
+
+def is_resource_use_flag(arg):
+    return arg.startswith('-u') or arg.startswith('--use')
+
+
+def main(regrtest_args):
+    args = [sys.executable,
+            '-W', 'default',      # Warnings set to 'default'
+            '-bb',                # Warnings about bytes/bytearray
+            '-E',                 # Ignore environment variables
+            '-R',                 # Randomize hashing
+            ]
+    # Allow user-specified interpreter options to override our defaults.
+    args.extend(test.support.args_from_interpreter_flags())
+    args.extend(['-m', 'test',    # Run the test suite
+                 '-r',            # Randomize test order
+                 '-w',            # Re-run failed tests in verbose mode
+                 ])
+    if sys.platform == 'win32':
+        args.append('-n')         # Silence alerts under Windows
+    if not any(is_multiprocess_flag(arg) for arg in regrtest_args):
+        args.extend(['-j', '0'])  # Use all CPU cores
+    if not any(is_resource_use_flag(arg) for arg in regrtest_args):
+        args.extend(['-u', 'all,-largefile,-audio,-gui'])
+    args.extend(regrtest_args)
+    print(' '.join(args))
+    os.execv(sys.executable, args)
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
diff --git a/Tools/unicode/comparecodecs.py b/Tools/unicode/comparecodecs.py
index 0f5c1e2..7de14fd 100644
--- a/Tools/unicode/comparecodecs.py
+++ b/Tools/unicode/comparecodecs.py
@@ -14,7 +14,7 @@ def compare_codecs(encoding1, encoding2):
     print('Comparing encoding/decoding of   %r and   %r' % (encoding1, encoding2))
     mismatch = 0
     # Check encoding
-    for i in range(sys.maxunicode):
+    for i in range(sys.maxunicode+1):
         u = chr(i)
         try:
             c1 = u.encode(encoding1)
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index d503190..17edc3c 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -21,11 +21,18 @@
 # 2004-05-29 perky add east asian width information
 # 2006-03-10 mvl  update to Unicode 4.1; add UCD 3.2 delta
 # 2008-06-11 gb   add PRINTABLE_MASK for Atsuo Ishimoto's ascii() patch
+# 2011-10-21 ezio add support for name aliases and named sequences
+# 2012-01    benjamin add full case mappings
 #
 # written by Fredrik Lundh (fredrik@pythonware.com)
 #
 
-import sys, os, zipfile
+import os
+import sys
+import zipfile
+
+from textwrap import dedent
+from operator import itemgetter
 
 SCRIPT = sys.argv[0]
 VERSION = "3.2"
@@ -39,6 +46,19 @@ UNIHAN = "Unihan%s.zip"
 DERIVED_CORE_PROPERTIES = "DerivedCoreProperties%s.txt"
 DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt"
 LINE_BREAK = "LineBreak%s.txt"
+NAME_ALIASES = "NameAliases%s.txt"
+NAMED_SEQUENCES = "NamedSequences%s.txt"
+SPECIAL_CASING = "SpecialCasing%s.txt"
+CASE_FOLDING = "CaseFolding%s.txt"
+
+# Private Use Areas -- in planes 1, 15, 16
+PUA_1 = range(0xE000, 0xF900)
+PUA_15 = range(0xF0000, 0xFFFFE)
+PUA_16 = range(0x100000, 0x10FFFE)
+
+# we use this ranges of PUA_15 to store name aliases and named sequences
+NAME_ALIASES_START = 0xF0000
+NAMED_SEQUENCES_START = 0xF0100
 
 old_versions = ["3.2.0"]
 
@@ -67,8 +87,10 @@ UPPER_MASK = 0x80
 XID_START_MASK = 0x100
 XID_CONTINUE_MASK = 0x200
 PRINTABLE_MASK = 0x400
-NODELTA_MASK = 0x800
-NUMERIC_MASK = 0x1000
+NUMERIC_MASK = 0x800
+CASE_IGNORABLE_MASK = 0x1000
+CASED_MASK = 0x2000
+EXTENDED_CASE_MASK = 0x4000
 
 # these ranges need to match unicodedata.c:is_unified_ideograph
 cjk_ranges = [
@@ -367,6 +389,7 @@ def makeunicodetype(unicode, trace):
     numeric = {}
     spaces = []
     linebreaks = []
+    extra_casing = []
 
     for char in unicode.chars:
         record = unicode.table[char]
@@ -379,7 +402,7 @@ def makeunicodetype(unicode, trace):
             delta = True
             if category in ["Lm", "Lt", "Lu", "Ll", "Lo"]:
                 flags |= ALPHA_MASK
-            if category == "Ll":
+            if "Lowercase" in properties:
                 flags |= LOWER_MASK
             if 'Line_Break' in properties or bidirectional == "B":
                 flags |= LINEBREAK_MASK
@@ -389,7 +412,7 @@ def makeunicodetype(unicode, trace):
                 spaces.append(char)
             if category == "Lt":
                 flags |= TITLE_MASK
-            if category == "Lu":
+            if "Uppercase" in properties:
                 flags |= UPPER_MASK
             if char == ord(" ") or category[0] not in ("C", "Z"):
                 flags |= PRINTABLE_MASK
@@ -397,7 +420,12 @@ def makeunicodetype(unicode, trace):
                 flags |= XID_START_MASK
             if "XID_Continue" in properties:
                 flags |= XID_CONTINUE_MASK
-            # use delta predictor for upper/lower/title if it fits
+            if "Cased" in properties:
+                flags |= CASED_MASK
+            if "Case_Ignorable" in properties:
+                flags |= CASE_IGNORABLE_MASK
+            sc = unicode.special_casing.get(char)
+            cf = unicode.case_folding.get(char, [char])
             if record[12]:
                 upper = int(record[12], 16)
             else:
@@ -409,23 +437,39 @@ def makeunicodetype(unicode, trace):
             if record[14]:
                 title = int(record[14], 16)
             else:
-                # UCD.html says that a missing title char means that
-                # it defaults to the uppercase character, not to the
-                # character itself. Apparently, in the current UCD (5.x)
-                # this feature is never used
                 title = upper
-            upper_d = upper - char
-            lower_d = lower - char
-            title_d = title - char
-            if -32768 <= upper_d <= 32767 and \
-               -32768 <= lower_d <= 32767 and \
-               -32768 <= title_d <= 32767:
-                # use deltas
-                upper = upper_d & 0xffff
-                lower = lower_d & 0xffff
-                title = title_d & 0xffff
+            if sc is None and cf != [lower]:
+                sc = ([lower], [title], [upper])
+            if sc is None:
+                if upper == lower == title:
+                    upper = lower = title = 0
+                else:
+                    upper = upper - char
+                    lower = lower - char
+                    title = title - char
+                    assert (abs(upper) <= 2147483647 and
+                            abs(lower) <= 2147483647 and
+                            abs(title) <= 2147483647)
             else:
-                flags |= NODELTA_MASK
+                # This happens either when some character maps to more than one
+                # character in uppercase, lowercase, or titlecase or the
+                # casefolded version of the character is different from the
+                # lowercase. The extra characters are stored in a different
+                # array.
+                flags |= EXTENDED_CASE_MASK
+                lower = len(extra_casing) | (len(sc[0]) << 24)
+                extra_casing.extend(sc[0])
+                if cf != sc[0]:
+                    lower |= len(cf) << 20
+                    extra_casing.extend(cf)
+                upper = len(extra_casing) | (len(sc[2]) << 24)
+                extra_casing.extend(sc[2])
+                # Title is probably equal to upper.
+                if sc[1] == sc[2]:
+                    title = upper
+                else:
+                    title = len(extra_casing) | (len(sc[1]) << 24)
+                    extra_casing.extend(sc[1])
             # decimal digit, integer digit
             decimal = 0
             if record[6]:
@@ -452,6 +496,7 @@ def makeunicodetype(unicode, trace):
     print(sum(map(len, numeric.values())), "numeric code points")
     print(len(spaces), "whitespace code points")
     print(len(linebreaks), "linebreak code points")
+    print(len(extra_casing), "extended case array")
 
     print("--- Writing", FILE, "...")
 
@@ -465,6 +510,14 @@ def makeunicodetype(unicode, trace):
     print("};", file=fp)
     print(file=fp)
 
+    print("/* extended case mappings */", file=fp)
+    print(file=fp)
+    print("const Py_UCS4 _PyUnicode_ExtendedCase[] = {", file=fp)
+    for c in extra_casing:
+        print("    %d," % c, file=fp)
+    print("};", file=fp)
+    print(file=fp)
+
     # split decomposition index table
     index1, index2, shift = splitbins(index, trace)
 
@@ -692,6 +745,39 @@ def makeunicodename(unicode, trace):
     print("/* name->code dictionary */", file=fp)
     codehash.dump(fp, trace)
 
+    print(file=fp)
+    print('static const unsigned int aliases_start = %#x;' %
+          NAME_ALIASES_START, file=fp)
+    print('static const unsigned int aliases_end = %#x;' %
+          (NAME_ALIASES_START + len(unicode.aliases)), file=fp)
+
+    print('static const unsigned int name_aliases[] = {', file=fp)
+    for name, codepoint in unicode.aliases:
+        print('    0x%04X,' % codepoint, file=fp)
+    print('};', file=fp)
+
+    # In Unicode 6.0.0, the sequences contain at most 4 BMP chars,
+    # so we are using Py_UCS2 seq[4].  This needs to be updated if longer
+    # sequences or sequences with non-BMP chars are added.
+    # unicodedata_lookup should be adapted too.
+    print(dedent("""
+        typedef struct NamedSequence {
+            int seqlen;
+            Py_UCS2 seq[4];
+        } named_sequence;
+        """), file=fp)
+
+    print('static const unsigned int named_sequences_start = %#x;' %
+          NAMED_SEQUENCES_START, file=fp)
+    print('static const unsigned int named_sequences_end = %#x;' %
+          (NAMED_SEQUENCES_START + len(unicode.named_sequences)), file=fp)
+
+    print('static const named_sequence named_sequences[] = {', file=fp)
+    for name, sequence in unicode.named_sequences:
+        seq_str = ', '.join('0x%04X' % cp for cp in sequence)
+        print('    {%d, {%s}},' % (len(sequence), seq_str), file=fp)
+    print('};', file=fp)
+
     fp.close()
 
 
@@ -726,7 +812,11 @@ def merge_old_version(version, new, old):
             for k in range(len(old.table[i])):
                 if old.table[i][k] != new.table[i][k]:
                     value = old.table[i][k]
-                    if k == 2:
+                    if k == 1 and i in PUA_15:
+                        # the name is not set in the old.table, but in the
+                        # new.table we are using it for aliases and named seq
+                        assert value == ''
+                    elif k == 2:
                         #print "CATEGORY",hex(i), old.table[i][k], new.table[i][k]
                         category_changes[i] = CATEGORY_NAMES.index(value)
                     elif k == 4:
@@ -816,15 +906,15 @@ class UnicodeData:
                  expand=1,
                  cjk_check=True):
         self.changed = []
-        file = open_data(UNICODE_DATA, version)
         table = [None] * 0x110000
-        while 1:
-            s = file.readline()
-            if not s:
-                break
-            s = s.strip().split(";")
-            char = int(s[0], 16)
-            table[char] = s
+        with open_data(UNICODE_DATA, version) as file:
+            while 1:
+                s = file.readline()
+                if not s:
+                    break
+                s = s.strip().split(";")
+                char = int(s[0], 16)
+                table[char] = s
 
         cjk_ranges_found = []
 
@@ -855,32 +945,78 @@ class UnicodeData:
         self.table = table
         self.chars = list(range(0x110000)) # unicode 3.2
 
-        file = open_data(COMPOSITION_EXCLUSIONS, version)
+        # check for name aliases and named sequences, see #12753
+        # aliases and named sequences are not in 3.2.0
+        if version != '3.2.0':
+            self.aliases = []
+            # store aliases in the Private Use Area 15, in range U+F0000..U+F00FF,
+            # in order to take advantage of the compression and lookup
+            # algorithms used for the other characters
+            pua_index = NAME_ALIASES_START
+            with open_data(NAME_ALIASES, version) as file:
+                for s in file:
+                    s = s.strip()
+                    if not s or s.startswith('#'):
+                        continue
+                    char, name = s.split(';')
+                    char = int(char, 16)
+                    self.aliases.append((name, char))
+                    # also store the name in the PUA 1
+                    self.table[pua_index][1] = name
+                    pua_index += 1
+            assert pua_index - NAME_ALIASES_START == len(self.aliases)
+
+            self.named_sequences = []
+            # store named seqences in the PUA 1, in range U+F0100..,
+            # in order to take advantage of the compression and lookup
+            # algorithms used for the other characters.
+
+            pua_index = NAMED_SEQUENCES_START
+            with open_data(NAMED_SEQUENCES, version) as file:
+                for s in file:
+                    s = s.strip()
+                    if not s or s.startswith('#'):
+                        continue
+                    name, chars = s.split(';')
+                    chars = tuple(int(char, 16) for char in chars.split())
+                    # check that the structure defined in makeunicodename is OK
+                    assert 2 <= len(chars) <= 4, "change the Py_UCS2 array size"
+                    assert all(c <= 0xFFFF for c in chars), ("use Py_UCS4 in "
+                        "the NamedSequence struct and in unicodedata_lookup")
+                    self.named_sequences.append((name, chars))
+                    # also store these in the PUA 1
+                    self.table[pua_index][1] = name
+                    pua_index += 1
+            assert pua_index - NAMED_SEQUENCES_START == len(self.named_sequences)
+
         self.exclusions = {}
-        for s in file:
-            s = s.strip()
-            if not s:
-                continue
-            if s[0] == '#':
-                continue
-            char = int(s.split()[0],16)
-            self.exclusions[char] = 1
+        with open_data(COMPOSITION_EXCLUSIONS, version) as file:
+            for s in file:
+                s = s.strip()
+                if not s:
+                    continue
+                if s[0] == '#':
+                    continue
+                char = int(s.split()[0],16)
+                self.exclusions[char] = 1
 
         widths = [None] * 0x110000
-        for s in open_data(EASTASIAN_WIDTH, version):
-            s = s.strip()
-            if not s:
-                continue
-            if s[0] == '#':
-                continue
-            s = s.split()[0].split(';')
-            if '..' in s[0]:
-                first, last = [int(c, 16) for c in s[0].split('..')]
-                chars = list(range(first, last+1))
-            else:
-                chars = [int(s[0], 16)]
-            for char in chars:
-                widths[char] = s[1]
+        with open_data(EASTASIAN_WIDTH, version) as file:
+            for s in file:
+                s = s.strip()
+                if not s:
+                    continue
+                if s[0] == '#':
+                    continue
+                s = s.split()[0].split(';')
+                if '..' in s[0]:
+                    first, last = [int(c, 16) for c in s[0].split('..')]
+                    chars = list(range(first, last+1))
+                else:
+                    chars = [int(s[0], 16)]
+                for char in chars:
+                    widths[char] = s[1]
+
         for i in range(0, 0x110000):
             if table[i] is not None:
                 table[i].append(widths[i])
@@ -888,36 +1024,39 @@ class UnicodeData:
         for i in range(0, 0x110000):
             if table[i] is not None:
                 table[i].append(set())
-        for s in open_data(DERIVED_CORE_PROPERTIES, version):
-            s = s.split('#', 1)[0].strip()
-            if not s:
-                continue
 
-            r, p = s.split(";")
-            r = r.strip()
-            p = p.strip()
-            if ".." in r:
-                first, last = [int(c, 16) for c in r.split('..')]
-                chars = list(range(first, last+1))
-            else:
-                chars = [int(r, 16)]
-            for char in chars:
-                if table[char]:
-                    # Some properties (e.g. Default_Ignorable_Code_Point)
-                    # apply to unassigned code points; ignore them
-                    table[char][-1].add(p)
-
-        for s in open_data(LINE_BREAK, version):
-            s = s.partition('#')[0]
-            s = [i.strip() for i in s.split(';')]
-            if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS:
-                continue
-            if '..' not in s[0]:
-                first = last = int(s[0], 16)
-            else:
-                first, last = [int(c, 16) for c in s[0].split('..')]
-            for char in range(first, last+1):
-                table[char][-1].add('Line_Break')
+        with open_data(DERIVED_CORE_PROPERTIES, version) as file:
+            for s in file:
+                s = s.split('#', 1)[0].strip()
+                if not s:
+                    continue
+
+                r, p = s.split(";")
+                r = r.strip()
+                p = p.strip()
+                if ".." in r:
+                    first, last = [int(c, 16) for c in r.split('..')]
+                    chars = list(range(first, last+1))
+                else:
+                    chars = [int(r, 16)]
+                for char in chars:
+                    if table[char]:
+                        # Some properties (e.g. Default_Ignorable_Code_Point)
+                        # apply to unassigned code points; ignore them
+                        table[char][-1].add(p)
+
+        with open_data(LINE_BREAK, version) as file:
+            for s in file:
+                s = s.partition('#')[0]
+                s = [i.strip() for i in s.split(';')]
+                if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS:
+                    continue
+                if '..' not in s[0]:
+                    first = last = int(s[0], 16)
+                else:
+                    first, last = [int(c, 16) for c in s[0].split('..')]
+                for char in range(first, last+1):
+                    table[char][-1].add('Line_Break')
 
         # We only want the quickcheck properties
         # Format: NF?_QC; Y(es)/N(o)/M(aybe)
@@ -928,31 +1067,33 @@ class UnicodeData:
         # for older versions, and no delta records will be created.
         quickchecks = [0] * 0x110000
         qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split()
-        for s in open_data(DERIVEDNORMALIZATION_PROPS, version):
-            if '#' in s:
-                s = s[:s.index('#')]
-            s = [i.strip() for i in s.split(';')]
-            if len(s) < 2 or s[1] not in qc_order:
-                continue
-            quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No
-            quickcheck_shift = qc_order.index(s[1])*2
-            quickcheck <<= quickcheck_shift
-            if '..' not in s[0]:
-                first = last = int(s[0], 16)
-            else:
-                first, last = [int(c, 16) for c in s[0].split('..')]
-            for char in range(first, last+1):
-                assert not (quickchecks[char]>>quickcheck_shift)&3
-                quickchecks[char] |= quickcheck
+        with open_data(DERIVEDNORMALIZATION_PROPS, version) as file:
+            for s in file:
+                if '#' in s:
+                    s = s[:s.index('#')]
+                s = [i.strip() for i in s.split(';')]
+                if len(s) < 2 or s[1] not in qc_order:
+                    continue
+                quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No
+                quickcheck_shift = qc_order.index(s[1])*2
+                quickcheck <<= quickcheck_shift
+                if '..' not in s[0]:
+                    first = last = int(s[0], 16)
+                else:
+                    first, last = [int(c, 16) for c in s[0].split('..')]
+                for char in range(first, last+1):
+                    assert not (quickchecks[char]>>quickcheck_shift)&3
+                    quickchecks[char] |= quickcheck
         for i in range(0, 0x110000):
             if table[i] is not None:
                 table[i].append(quickchecks[i])
 
-        zip = zipfile.ZipFile(open_data(UNIHAN, version))
-        if version == '3.2.0':
-            data = zip.open('Unihan-3.2.0.txt').read()
-        else:
-            data = zip.open('Unihan_NumericValues.txt').read()
+        with open_data(UNIHAN, version) as file:
+            zip = zipfile.ZipFile(file)
+            if version == '3.2.0':
+                data = zip.open('Unihan-3.2.0.txt').read()
+            else:
+                data = zip.open('Unihan_NumericValues.txt').read()
         for line in data.decode("utf-8").splitlines():
             if not line.startswith('U+'):
                 continue
@@ -965,6 +1106,34 @@ class UnicodeData:
             # Patch the numeric field
             if table[i] is not None:
                 table[i][8] = value
+        sc = self.special_casing = {}
+        with open_data(SPECIAL_CASING, version) as file:
+            for s in file:
+                s = s[:-1].split('#', 1)[0]
+                if not s:
+                    continue
+                data = s.split("; ")
+                if data[4]:
+                    # We ignore all conditionals (since they depend on
+                    # languages) except for one, which is hardcoded. See
+                    # handle_capital_sigma in unicodeobject.c.
+                    continue
+                c = int(data[0], 16)
+                lower = [int(char, 16) for char in data[1].split()]
+                title = [int(char, 16) for char in data[2].split()]
+                upper = [int(char, 16) for char in data[3].split()]
+                sc[c] = (lower, title, upper)
+        cf = self.case_folding = {}
+        if version != '3.2.0':
+            with open_data(CASE_FOLDING, version) as file:
+                for s in file:
+                    s = s[:-1].split('#', 1)[0]
+                    if not s:
+                        continue
+                    data = s.split("; ")
+                    if data[1] in "CF":
+                        c = int(data[0], 16)
+                        cf[c] = [int(char, 16) for char in data[2].split()]
 
     def uselatin1(self):
         # restrict character range to ISO Latin 1
diff --git a/Tools/unittestgui/unittestgui.py b/Tools/unittestgui/unittestgui.py
index b526646..09a20e2 100644
--- a/Tools/unittestgui/unittestgui.py
+++ b/Tools/unittestgui/unittestgui.py
@@ -28,7 +28,6 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 """
 
 __author__ = "Steve Purcell (stephen_purcell@yahoo.com)"
-__version__ = "$Revision: 1.7 $"[11:-2]
 
 import sys
 import traceback