diff options
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/buildbot/external-common.bat | 5 | ||||
-rw-r--r-- | Tools/gdb/libpython.py | 95 | ||||
-rw-r--r-- | Tools/iobench/iobench.py | 19 | ||||
-rw-r--r-- | Tools/msi/msi.py | 160 | ||||
-rw-r--r-- | Tools/msi/msilib.py | 18 | ||||
-rw-r--r-- | Tools/msi/uuids.py | 98 | ||||
-rwxr-xr-x | Tools/pybench/pybench.py | 1 | ||||
-rw-r--r-- | Tools/scripts/README | 3 | ||||
-rwxr-xr-x | Tools/scripts/findnocoding.py | 4 | ||||
-rwxr-xr-x | Tools/scripts/patchcheck.py | 53 | ||||
-rwxr-xr-x | Tools/scripts/pysetup3 | 4 | ||||
-rwxr-xr-x | Tools/scripts/pysource.py | 2 | ||||
-rwxr-xr-x | Tools/scripts/reindent.py | 18 | ||||
-rwxr-xr-x | Tools/scripts/run_tests.py | 47 | ||||
-rw-r--r-- | Tools/unicode/comparecodecs.py | 2 | ||||
-rw-r--r-- | Tools/unicode/makeunicodedata.py | 380 | ||||
-rw-r--r-- | Tools/unittestgui/unittestgui.py | 1 |
17 files changed, 507 insertions, 403 deletions
diff --git a/Tools/buildbot/external-common.bat b/Tools/buildbot/external-common.bat index 1ff282e..244d5f5 100644 --- a/Tools/buildbot/external-common.bat +++ b/Tools/buildbot/external-common.bat @@ -41,3 +41,8 @@ if not exist sqlite-3.7.4 ( rd /s/q sqlite-source-3.6.21 svn export http://svn.python.org/projects/external/sqlite-3.7.4 ) + +@rem lzma +if not exist xz-5.0.3 ( + svn export http://svn.python.org/projects/external/xz-5.0.3 +) diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 8bbbb10..30347cb 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -49,6 +49,11 @@ import sys _type_char_ptr = gdb.lookup_type('char').pointer() # char* _type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char* _type_void_ptr = gdb.lookup_type('void').pointer() # void* +_type_unsigned_short_ptr = gdb.lookup_type('unsigned short').pointer() +_type_unsigned_int_ptr = gdb.lookup_type('unsigned int').pointer() + +# value computed later, see PyUnicodeObjectPtr.proxy() +_is_pep393 = None SIZEOF_VOID_P = _type_void_ptr.sizeof @@ -322,7 +327,6 @@ class PyObjectPtr(object): name_map = {'bool': PyBoolObjectPtr, 'classobj': PyClassObjectPtr, - 'instance': PyInstanceObjectPtr, 'NoneType': PyNoneStructPtr, 'frame': PyFrameObjectPtr, 'set' : PySetObjectPtr, @@ -396,7 +400,7 @@ class ProxyAlreadyVisited(object): def _write_instance_repr(out, visited, name, pyop_attrdict, address): - '''Shared code for use by old-style and new-style classes: + '''Shared code for use by all classes: write a representation to file-like object "out"''' out.write('<') out.write(name) @@ -479,7 +483,7 @@ class HeapTypeObjectPtr(PyObjectPtr): def proxyval(self, visited): ''' - Support for new-style classes. + Support for classes. Currently we just locate the dictionary using a transliteration to python of _PyObject_GetDictPtr, ignoring descriptors @@ -496,7 +500,7 @@ class HeapTypeObjectPtr(PyObjectPtr): attr_dict = {} tp_name = self.safe_tp_name() - # New-style class: + # Class: return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) def write_repr(self, out, visited): @@ -668,44 +672,6 @@ class PyDictObjectPtr(PyObjectPtr): pyop_value.write_repr(out, visited) out.write('}') -class PyInstanceObjectPtr(PyObjectPtr): - _typename = 'PyInstanceObject' - - def proxyval(self, visited): - # Guard against infinite loops: - if self.as_address() in visited: - return ProxyAlreadyVisited('<...>') - visited.add(self.as_address()) - - # Get name of class: - in_class = self.pyop_field('in_class') - cl_name = in_class.pyop_field('cl_name').proxyval(visited) - - # Get dictionary of instance attributes: - in_dict = self.pyop_field('in_dict').proxyval(visited) - - # Old-style class: - return InstanceProxy(cl_name, in_dict, long(self._gdbval)) - - def write_repr(self, out, visited): - # Guard against infinite loops: - if self.as_address() in visited: - out.write('<...>') - return - visited.add(self.as_address()) - - # Old-style class: - - # Get name of class: - in_class = self.pyop_field('in_class') - cl_name = in_class.pyop_field('cl_name').proxyval(visited) - - # Get dictionary of instance attributes: - pyop_in_dict = self.pyop_field('in_dict') - - _write_instance_repr(out, visited, - cl_name, pyop_in_dict, self.as_address()) - class PyListObjectPtr(PyObjectPtr): _typename = 'PyListObject' @@ -1123,15 +1089,46 @@ class PyUnicodeObjectPtr(PyObjectPtr): return _type_Py_UNICODE.sizeof def proxyval(self, visited): - # From unicodeobject.h: - # Py_ssize_t length; /* Length of raw Unicode data in buffer */ - # Py_UNICODE *str; /* Raw Unicode buffer */ - field_length = long(self.field('length')) - field_str = self.field('str') + global _is_pep393 + if _is_pep393 is None: + fields = gdb.lookup_type('PyUnicodeObject').target().fields() + _is_pep393 = 'data' in [f.name for f in fields] + if _is_pep393: + # Python 3.3 and newer + may_have_surrogates = False + compact = self.field('_base') + ascii = compact['_base'] + state = ascii['state'] + is_compact_ascii = (int(state['ascii']) and int(state['compact'])) + if not int(state['ready']): + # string is not ready + field_length = long(compact['wstr_length']) + may_have_surrogates = True + field_str = ascii['wstr'] + else: + field_length = long(ascii['length']) + if is_compact_ascii: + field_str = ascii.address + 1 + elif int(state['compact']): + field_str = compact.address + 1 + else: + field_str = self.field('data')['any'] + repr_kind = int(state['kind']) + if repr_kind == 1: + field_str = field_str.cast(_type_unsigned_char_ptr) + elif repr_kind == 2: + field_str = field_str.cast(_type_unsigned_short_ptr) + elif repr_kind == 4: + field_str = field_str.cast(_type_unsigned_int_ptr) + else: + # Python 3.2 and earlier + field_length = long(self.field('length')) + field_str = self.field('str') + may_have_surrogates = self.char_width() == 2 # Gather a list of ints from the Py_UNICODE array; these are either - # UCS-2 or UCS-4 code points: - if self.char_width() > 2: + # UCS-1, UCS-2 or UCS-4 code points: + if not may_have_surrogates: Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] else: # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the diff --git a/Tools/iobench/iobench.py b/Tools/iobench/iobench.py index 5ec6f17..408be7b 100644 --- a/Tools/iobench/iobench.py +++ b/Tools/iobench/iobench.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- # This file should be kept compatible with both Python 2.6 and Python >= 3.0. -import time +import functools +import hashlib +import itertools import os +import platform import re import sys -import hashlib -import functools -import itertools +import time from optparse import OptionParser out = sys.stdout @@ -307,6 +308,16 @@ def run_all_tests(options): "large": 2, } + print("Python %s" % sys.version) + if sys.version_info < (3, 3): + if sys.maxunicode > 0xffff: + text = "UCS-4 (wide build)" + else: + text = "UTF-16 (narrow build)" + else: + text = "PEP 393" + print("Unicode: %s" % text) + print(platform.platform()) binary_files = list(get_binary_files()) text_files = list(get_text_files()) if "b" in options: diff --git a/Tools/msi/msi.py b/Tools/msi/msi.py index 508816d..a18debc 100644 --- a/Tools/msi/msi.py +++ b/Tools/msi/msi.py @@ -2,12 +2,11 @@ # (C) 2003 Martin v. Loewis # See "FOO" in comments refers to MSDN sections with the title FOO. import msilib, schema, sequence, os, glob, time, re, shutil, zipfile +import subprocess, tempfile from msilib import Feature, CAB, Directory, Dialog, Binary, add_data import uisample from win32com.client import constants from distutils.spawn import find_executable -from uuids import product_codes -import tempfile # Settings can be overridden in config.py below # 0 for official python.org releases @@ -77,19 +76,16 @@ upgrade_code_64='{6A965A0C-6EE6-4E3A-9983-3263F56311EC}' if snapshot: current_version = "%s.%s.%s" % (major, minor, int(time.time()/3600/24)) - product_code = msilib.gen_uuid() -else: - product_code = product_codes[current_version] if full_current_version is None: full_current_version = current_version extensions = [ - 'bz2.pyd', 'pyexpat.pyd', 'select.pyd', 'unicodedata.pyd', 'winsound.pyd', + '_bz2.pyd', '_elementtree.pyd', '_socket.pyd', '_ssl.pyd', @@ -100,7 +96,8 @@ extensions = [ '_ctypes_test.pyd', '_sqlite3.pyd', '_hashlib.pyd', - '_multiprocessing.pyd' + '_multiprocessing.pyd', + '_lzma.pyd' ] # Well-known component UUIDs @@ -119,6 +116,7 @@ pythondll_uuid = { "30":"{6953bc3b-6768-4291-8410-7914ce6e2ca8}", "31":"{4afcba0b-13e4-47c3-bebe-477428b46913}", "32":"{3ff95315-1096-4d31-bd86-601d5438ad5e}", + "33":"{f7581ca4-d368-4eea-8f82-d48c64c4f047}", } [major+minor] # Compute the name that Sphinx gives to the docfile @@ -185,12 +183,19 @@ dll_path = os.path.join(srcdir, PCBUILD, dll_file) msilib.set_arch_from_file(dll_path) if msilib.pe_type(dll_path) != msilib.pe_type("msisupport.dll"): raise SystemError("msisupport.dll for incorrect architecture") + if msilib.Win64: upgrade_code = upgrade_code_64 - # Bump the last digit of the code by one, so that 32-bit and 64-bit - # releases get separate product codes - digit = hex((int(product_code[-2],16)+1)%16)[-1] - product_code = product_code[:-2] + digit + '}' + +if snapshot: + product_code = msilib.gen_uuid() +else: + # official release: generate UUID from the download link that the file will have + import uuid + product_code = uuid.uuid3(uuid.NAMESPACE_URL, + 'http://www.python.org/ftp/python/%s.%s.%s/python-%s%s.msi' % + (major, minor, micro, full_current_version, msilib.arch_ext)) + product_code = '{%s}' % product_code if testpackage: ext = 'px' @@ -904,16 +909,27 @@ class PyDirectory(Directory): kw['componentflags'] = 2 #msidbComponentAttributesOptional Directory.__init__(self, *args, **kw) - def check_unpackaged(self): - self.unpackaged_files.discard('__pycache__') - self.unpackaged_files.discard('.svn') - if self.unpackaged_files: - print "Warning: Unpackaged files in %s" % self.absolute - print self.unpackaged_files +def hgmanifest(): + # Fetch file list from Mercurial + process = subprocess.Popen(['hg', 'manifest'], stdout=subprocess.PIPE) + stdout, stderr = process.communicate() + # Create nested directories for file tree + result = {} + for line in stdout.splitlines(): + components = line.split('/') + d = result + while len(components) > 1: + d1 = d.setdefault(components[0], {}) + d = d1 + del components[0] + d[components[0]] = None + return result + # See "File Table", "Component Table", "Directory Table", # "FeatureComponents Table" def add_files(db): + hgfiles = hgmanifest() cab = CAB("python") tmpfiles = [] # Add all executables, icons, text files into the TARGETDIR component @@ -975,104 +991,40 @@ def add_files(db): # Add all .py files in Lib, except tkinter, test dirs = [] - pydirs = [(root,"Lib")] + pydirs = [(root, "Lib", hgfiles["Lib"], default_feature)] while pydirs: # Commit every now and then, or else installer will complain db.Commit() - parent, dir = pydirs.pop() - if dir == ".svn" or dir == '__pycache__' or dir.startswith("plat-"): + parent, dir, files, feature = pydirs.pop() + if dir.startswith("plat-"): continue - elif dir in ["tkinter", "idlelib", "Icons"]: + if dir in ["tkinter", "idlelib", "turtledemo"]: if not have_tcl: continue + feature = tcltk tcltk.set_current() - elif dir in ['test', 'tests', 'data', 'output']: - # test: Lib, Lib/email, Lib/ctypes, Lib/sqlite3 - # tests: Lib/distutils - # data: Lib/email/test - # output: Lib/test - testsuite.set_current() + elif dir in ('test', 'tests'): + feature = testsuite elif not have_ctypes and dir == "ctypes": continue - else: - default_feature.set_current() + feature.set_current() lib = PyDirectory(db, cab, parent, dir, dir, "%s|%s" % (parent.make_short(dir), dir)) - # Add additional files dirs.append(lib) - lib.glob("*.txt") - if dir=='site-packages': - lib.add_file("README.txt", src="README") - continue - files = lib.glob("*.py") - files += lib.glob("*.pyw") - if files: - # Add an entry to the RemoveFile table to remove bytecode files. - lib.remove_pyc() - # package READMEs if present - lib.glob("README") - if dir=='Lib': - lib.add_file('wsgiref.egg-info') - if dir=='test' and parent.physical=='Lib': - lib.add_file("185test.db") - lib.add_file("audiotest.au") - lib.add_file("sgml_input.html") - lib.add_file("testtar.tar") - lib.add_file("test_difflib_expect.html") - lib.add_file("check_soundcard.vbs") - lib.add_file("empty.vbs") - lib.add_file("Sine-1000Hz-300ms.aif") - lib.add_file("mime.types") - lib.glob("*.uue") - lib.glob("*.pem") - lib.glob("*.pck") - lib.glob("cfgparser.*") - lib.add_file("zip_cp437_header.zip") - lib.add_file("zipdir.zip") - if dir=='capath': - lib.glob("*.0") - if dir=='tests' and parent.physical=='distutils': - lib.add_file("Setup.sample") - if dir=='decimaltestdata': - lib.glob("*.decTest") - if dir=='xmltestdata': - lib.glob("*.xml") - lib.add_file("test.xml.out") - if dir=='output': - lib.glob("test_*") - if dir=='sndhdrdata': - lib.glob("sndhdr.*") - if dir=='idlelib': - lib.glob("*.def") - lib.add_file("idle.bat") - lib.add_file("ChangeLog") - if dir=="Icons": - lib.glob("*.gif") - lib.add_file("idle.icns") - if dir=="command" and parent.physical=="distutils": - lib.glob("wininst*.exe") - lib.add_file("command_template") - if dir=="lib2to3": - lib.removefile("pickle", "*.pickle") - if dir=="macholib": - lib.add_file("README.ctypes") - lib.glob("fetch_macholib*") - if dir=='turtledemo': - lib.add_file("turtle.cfg") - if dir=="pydoc_data": - lib.add_file("_pydoc.css") - if dir=="data" and parent.physical=="test" and parent.basedir.physical=="email": - # This should contain all non-.svn files listed in subversion - for f in os.listdir(lib.absolute): - if f.endswith(".txt") or f==".svn":continue - if f.endswith(".au") or f.endswith(".gif"): - lib.add_file(f) + has_py = False + for name, subdir in files.items(): + if subdir is None: + assert os.path.isfile(os.path.join(lib.absolute, name)) + if name == 'README': + lib.add_file("README.txt", src="README") else: - print("WARNING: New file %s in email/test/data" % f) - for f in os.listdir(lib.absolute): - if os.path.isdir(os.path.join(lib.absolute, f)): - pydirs.append((lib, f)) - for d in dirs: - d.check_unpackaged() + lib.add_file(name) + has_py = has_py or name.endswith(".py") or name.endswith(".pyw") + else: + assert os.path.isdir(os.path.join(lib.absolute, name)) + pydirs.append((lib, name, subdir, feature)) + + if has_py: + lib.remove_pyc() # Add DLLs default_feature.set_current() lib = DLLs @@ -1159,6 +1111,8 @@ def add_files(db): lib.add_file("README.txt", src="README") if f == 'Scripts': lib.add_file("2to3.py", src="2to3") + lib.add_file("pydoc3.py", src="pydoc3") + lib.add_file("pysetup3.py", src="pysetup3") if have_tcl: lib.start_component("pydocgui.pyw", tcltk, keyfile="pydocgui.pyw") lib.add_file("pydocgui.pyw") diff --git a/Tools/msi/msilib.py b/Tools/msi/msilib.py index 5795d0e..472d9d4 100644 --- a/Tools/msi/msilib.py +++ b/Tools/msi/msilib.py @@ -408,7 +408,7 @@ class Directory: self.physical = physical self.logical = logical self.component = None - self.short_names = sets.Set() + self.short_names = {} self.ids = sets.Set() self.keyfiles = {} self.componentflags = componentflags @@ -456,23 +456,25 @@ class Directory: [(feature.id, component)]) def make_short(self, file): + long = file file = re.sub(r'[\?|><:/*"+,;=\[\]]', '_', file) # restrictions on short names - parts = file.split(".") + parts = file.split(".", 1) if len(parts)>1: - suffix = parts[-1].upper() + suffix = parts[1].upper() else: - suffix = None + suffix = '' prefix = parts[0].upper() - if len(prefix) <= 8 and (not suffix or len(suffix)<=3): + if len(prefix) <= 8 and '.' not in suffix and len(suffix) <= 3: if suffix: file = prefix+"."+suffix else: file = prefix - assert file not in self.short_names + assert file not in self.short_names, (file, self.short_names[file]) else: prefix = prefix[:6] if suffix: - suffix = suffix[:3] + # last three characters of last suffix + suffix = suffix.rsplit('.')[-1][:3] pos = 1 while 1: if suffix: @@ -484,7 +486,7 @@ class Directory: assert pos < 10000 if pos in (10, 100, 1000): prefix = prefix[:-1] - self.short_names.add(file) + self.short_names[file] = long return file def add_file(self, file, src=None, version=None, language=None): diff --git a/Tools/msi/uuids.py b/Tools/msi/uuids.py deleted file mode 100644 index e06cdfb..0000000 --- a/Tools/msi/uuids.py +++ /dev/null @@ -1,98 +0,0 @@ -# This should be extended for each Python release. -# The product code must change whenever the name of the MSI file -# changes, and when new component codes are issued for existing -# components. See "Changing the Product Code". As we change the -# component codes with every build, we need a new product code -# each time. For intermediate (snapshot) releases, they are automatically -# generated. For official releases, we record the product codes, -# so people can refer to them. -product_codes = { - '2.5.101': '{bc14ce3e-5e72-4a64-ac1f-bf59a571898c}', # 2.5a1 - '2.5.102': '{5eed51c1-8e9d-4071-94c5-b40de5d49ba5}', # 2.5a2 - '2.5.103': '{73dcd966-ffec-415f-bb39-8342c1f47017}', # 2.5a3 - '2.5.111': '{c797ecf8-a8e6-4fec-bb99-526b65f28626}', # 2.5b1 - '2.5.112': '{32beb774-f625-439d-b587-7187487baf15}', # 2.5b2 - '2.5.113': '{89f23918-11cf-4f08-be13-b9b2e6463fd9}', # 2.5b3 - '2.5.121': '{8e9321bc-6b24-48a3-8fd4-c95f8e531e5f}', # 2.5c1 - '2.5.122': '{a6cd508d-9599-45da-a441-cbffa9f7e070}', # 2.5c2 - '2.5.150': '{0a2c5854-557e-48c8-835a-3b9f074bdcaa}', # 2.5.0 - '2.5.1121':'{0378b43e-6184-4c2f-be1a-4a367781cd54}', # 2.5.1c1 - '2.5.1150':'{31800004-6386-4999-a519-518f2d78d8f0}', # 2.5.1 - '2.5.2150':'{6304a7da-1132-4e91-a343-a296269eab8a}', # 2.5.2c1 - '2.5.2150':'{6b976adf-8ae8-434e-b282-a06c7f624d2f}', # 2.5.2 - '2.6.101': '{0ba82e1b-52fd-4e03-8610-a6c76238e8a8}', # 2.6a1 - '2.6.102': '{3b27e16c-56db-4570-a2d3-e9a26180c60b}', # 2.6a2 - '2.6.103': '{cd06a9c5-bde5-4bd7-9874-48933997122a}', # 2.6a3 - '2.6.104': '{dc6ed634-474a-4a50-a547-8de4b7491e53}', # 2.6a4 - '2.6.111': '{3f82079a-5bee-4c4a-8a41-8292389e24ae}', # 2.6b1 - '2.6.112': '{8a0e5970-f3e6-4737-9a2b-bc5ff0f15fb5}', # 2.6b2 - '2.6.113': '{df4f5c21-6fcc-4540-95de-85feba634e76}', # 2.6b3 - '2.6.121': '{bbd34464-ddeb-4028-99e5-f16c4a8fbdb3}', # 2.6c1 - '2.6.122': '{8f64787e-a023-4c60-bfee-25d3a3f592c6}', # 2.6c2 - '2.6.150': '{110eb5c4-e995-4cfb-ab80-a5f315bea9e8}', # 2.6.0 - '2.6.1150':'{9cc89170-000b-457d-91f1-53691f85b223}', # 2.6.1 - '2.6.2121':'{adac412b-b209-4c15-b6ab-dca1b6e47144}', # 2.6.2c1 - '2.6.2150':'{24aab420-4e30-4496-9739-3e216f3de6ae}', # 2.6.2 - '2.6.3121':'{a73e0254-dcda-4fe4-bf37-c7e1c4f4ebb6}', # 2.6.3c1 - '2.6.3150':'{3d9ac095-e115-4e94-bdef-7f7edf17697d}', # 2.6.3 - '2.6.4121':'{727de605-0359-4606-a94b-c2033652379b}', # 2.6.4c1 - '2.6.4122':'{4f7603c6-6352-4299-a398-150a31b19acc}', # 2.6.4c2 - '2.6.4150':'{e7394a0f-3f80-45b1-87fc-abcd51893246}', # 2.6.4 - '2.6.5121':'{e0e273d7-7598-4701-8325-c90c069fd5ff}', # 2.6.5c1 - '2.6.5122':'{fa227b76-0671-4dc6-b826-c2ff2a70dfd5}', # 2.6.5c2 - '2.6.5150':'{4723f199-fa64-4233-8e6e-9fccc95a18ee}', # 2.6.5 - '2.7.101': '{eca1bbef-432c-49ae-a667-c213cc7bbf22}', # 2.7a1 - '2.7.102': '{21ce16ed-73c4-460d-9b11-522f417b2090}', # 2.7a2 - '2.7.103': '{6e7dbd55-ba4a-48ac-a688-6c75db4d7500}', # 2.7a3 - '2.7.104': '{ee774ba3-74a5-48d9-b425-b35a287260c8}', # 2.7a4 - '2.7.111': '{9cfd9ec7-a9c7-4980-a1c6-054fc6493eb3}', # 2.7b1 - '2.7.112': '{9a72faf6-c304-4165-8595-9291ff30cac6}', # 2.7b2 - '2.7.121': '{f530c94a-dd53-4de9-948e-b632b9cb48d2}', # 2.7c1 - '2.7.122': '{f80905d2-dd8d-4b8e-8a40-c23c93dca07d}', # 2.7c2 - '2.7.150': '{20c31435-2a0a-4580-be8b-ac06fc243ca4}', # 2.7.0 - '3.0.101': '{8554263a-3242-4857-9359-aa87bc2c58c2}', # 3.0a1 - '3.0.102': '{692d6e2c-f0ac-40b8-a133-7191aeeb67f9}', # 3.0a2 - '3.0.103': '{49cb2995-751a-4753-be7a-d0b1bb585e06}', # 3.0a3 - '3.0.104': '{87cb019e-19fd-4238-b1c7-85751437d646}', # 3.0a4 - '3.0.105': '{cf2659af-19ec-43d2-8c35-0f6a09439d42}', # 3.0a5 - '3.0.111': '{36c26f55-837d-45cf-848c-5f5c0fb47a28}', # 3.0b1 - '3.0.112': '{056a0fbc-c8fe-4c61-aade-c4411b70c998}', # 3.0b2 - '3.0.113': '{2b2e89a9-83af-43f9-b7d5-96e80c5a3f26}', # 3.0b3 - '3.0.114': '{e95c31af-69be-4dd7-96e6-e5fc85e660e6}', # 3.0b4 - '3.0.121': '{d0979c5e-cd3c-42ec-be4c-e294da793573}', # 3.0c1 - '3.0.122': '{f707b8e9-a257-4045-818e-4923fc20fbb6}', # 3.0c2 - '3.0.123': '{5e7208f1-8643-4ea2-ab5e-4644887112e3}', # 3.0c3 - '3.0.150': '{e0e56e21-55de-4f77-a109-1baa72348743}', # 3.0.0 - '3.0.1121':'{d35b1ea5-3d70-4872-bf7e-cd066a77a9c9}', # 3.0.1c1 - '3.0.1150':'{de2f2d9c-53e2-40ee-8209-74da63cb060e}', # 3.0.1 - '3.0.2121':'{cef79e7f-9809-49e2-afd2-e24148d7c855}', # 3.0.2c1 - '3.0.2150':'{0cf3b95a-8382-4607-9779-c36407ff362c}', # 3.0.2 - '3.1.101': '{c423eada-c498-4d51-9eb4-bfeae647e0a0}', # 3.1a1 - '3.1.102': '{f6e199bf-dc64-42f3-87d4-1525991a013e}', # 3.1a2 - '3.1.111': '{c3c82893-69b2-4676-8554-1b6ee6c191e9}', # 3.1b1 - '3.1.121': '{da2b5170-12f3-4d99-8a1f-54926cca7acd}', # 3.1c1 - '3.1.122': '{bceb5133-e2ee-4109-951f-ac7e941a1692}', # 3.1c2 - '3.1.150': '{3ad61ee5-81d2-4d7e-adef-da1dd37277d1}', # 3.1.0 - '3.1.1121':'{5782f957-6d49-41d4-bad0-668715dfd638}', # 3.1.1c1 - '3.1.1150':'{7ff90460-89b7-435b-b583-b37b2815ccc7}', # 3.1.1 - '3.1.2121':'{ec45624a-378c-43be-91f3-3f7a59b0d90c}', # 3.1.2c1 - '3.1.2150':'{d40af016-506c-43fb-a738-bd54fa8c1e85}', # 3.1.2 - '3.2.101' :'{b411f168-7a36-4fff-902c-a554d1c78a4f}', # 3.2a1 - '3.2.102' :'{79ff73b7-8359-410f-b9c5-152d2026f8c8}', # 3.2a2 - '3.2.103' :'{e7635c65-c221-4b9b-b70a-5611b8369d77}', # 3.2a3 - '3.2.104' :'{748cd139-75b8-4ca8-98a7-58262298181e}', # 3.2a4 - '3.2.111' :'{20bfc16f-c7cd-4fc0-8f96-9914614a3c50}', # 3.2b1 - '3.2.112' :'{0e350c98-8d73-4993-b686-cfe87160046e}', # 3.2b2 - '3.2.121' :'{2094968d-7583-47f6-a7fd-22304532e09f}', # 3.2rc1 - '3.2.122' :'{4f3edfa6-cf70-469a-825f-e1206aa7f412}', # 3.2rc2 - '3.2.123' :'{90c673d7-8cfd-4969-9816-f7d70bad87f3}', # 3.2rc3 - '3.2.150' :'{b2042d5e-986d-44ec-aee3-afe4108ccc93}', # 3.2.0 - '3.2.1121':'{4f90de4a-83dd-4443-b625-ca130ff361dd}', # 3.2.1rc1 - '3.2.1122':'{dc5eb04d-ff8a-4bed-8f96-23942fd59e5f}', # 3.2.1rc2 - '3.2.1150':'{34b2530c-6349-4292-9dc3-60bda4aed93c}', # 3.2.1 - '3.2.2121':'{DFB29A53-ACC4-44e6-85A6-D0DA26FE8E4E}', # 3.2.2rc1 - '3.2.2150':'{4CDE3168-D060-4b7c-BC74-4D8F9BB01AFD}', # 3.2.2 - '3.2.3121':'{B8E8CFF7-E4C6-4a7c-9F06-BB3A8B75DDA8}', # 3.2.3rc1 - '3.2.3150':'{789C9644-9F82-44d3-B4CA-AC31F46F5882}', # 3.2.3 - -} diff --git a/Tools/pybench/pybench.py b/Tools/pybench/pybench.py index 8eaad63..cc1e55c 100755 --- a/Tools/pybench/pybench.py +++ b/Tools/pybench/pybench.py @@ -107,6 +107,7 @@ def get_machine_details(): print('Getting machine details...') buildno, builddate = platform.python_build() python = platform.python_version() + # XXX this is now always UCS4, maybe replace it with 'PEP393' in 3.3+? if sys.maxunicode == 65535: # UCS2 build (standard) unitype = 'UCS2' diff --git a/Tools/scripts/README b/Tools/scripts/README index 8c02529..eb28a9e 100644 --- a/Tools/scripts/README +++ b/Tools/scripts/README @@ -15,7 +15,7 @@ db2pickle.py Dump a database file to a pickle diff.py Print file diffs in context, unified, or ndiff formats dutree.py Format du(1) output as a tree sorted by size eptags.py Create Emacs TAGS file for Python modules -find_recursionlimit.py Find the maximum recursion limit on this machine +find_recursionlimit.py Find the maximum recursion limit on this machine finddiv.py A grep-like tool that looks for division operators findlinksto.py Recursively find symbolic links to a given path prefix findnocoding.py Find source files which need an encoding declaration @@ -53,6 +53,7 @@ redemo.py Basic regular expression demonstration facility reindent.py Change .py files to use 4-space indents reindent-rst.py Fix-up reStructuredText file whitespace rgrep.py Reverse grep through a file (useful for big logfiles) +run_tests.py Run the test suite with more sensible default options serve.py Small wsgiref-based web server, used in make serve in Doc suff.py Sort a list of files by suffix svneol.py Set svn:eol-style on all files in directory diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py index 77607ce..c42fa7c 100755 --- a/Tools/scripts/findnocoding.py +++ b/Tools/scripts/findnocoding.py @@ -2,7 +2,7 @@ """List all those Python files that require a coding directive -Usage: nocoding.py dir1 [dir2...] +Usage: findnocoding.py dir1 [dir2...] """ __author__ = "Oleg Broytmann, Georg Brandl" @@ -50,7 +50,7 @@ def has_correct_encoding(text, codec): def needs_declaration(fullpath): try: - infile = open(fullpath, 'rU') + infile = open(fullpath) except IOError: # Oops, the file was removed - ignore it return None diff --git a/Tools/scripts/patchcheck.py b/Tools/scripts/patchcheck.py index 0e18dd9..503c67a 100755 --- a/Tools/scripts/patchcheck.py +++ b/Tools/scripts/patchcheck.py @@ -49,29 +49,15 @@ def mq_patches_applied(): @status("Getting the list of files that have been added/changed", info=lambda x: n_files_str(len(x))) def changed_files(): - """Get the list of changed or added files from the VCS.""" - if os.path.isdir(os.path.join(SRCDIR, '.hg')): - vcs = 'hg' - cmd = 'hg status --added --modified --no-status' - if mq_patches_applied(): - cmd += ' --rev qparent' - elif os.path.isdir('.svn'): - vcs = 'svn' - cmd = 'svn status --quiet --non-interactive --ignore-externals' - else: + """Get the list of changed or added files from Mercurial.""" + if not os.path.isdir(os.path.join(SRCDIR, '.hg')): sys.exit('need a checkout to get modified files') - st = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) - try: - st.wait() - if vcs == 'hg': - return [x.decode().rstrip() for x in st.stdout] - else: - output = (x.decode().rstrip().rsplit(None, 1)[-1] - for x in st.stdout if x[0] in b'AM') - return set(path for path in output if os.path.isfile(path)) - finally: - st.stdout.close() + cmd = 'hg status --added --modified --no-status' + if mq_patches_applied(): + cmd += ' --rev qparent' + with subprocess.Popen(cmd.split(), stdout=subprocess.PIPE) as st: + return [x.decode().rstrip() for x in st.stdout] def report_modified_files(file_paths): @@ -89,10 +75,8 @@ def report_modified_files(file_paths): def normalize_whitespace(file_paths): """Make sure that the whitespace for .py files have been normalized.""" reindent.makebackup = False # No need to create backups. - fixed = [] - for path in (x for x in file_paths if x.endswith('.py')): - if reindent.check(os.path.join(SRCDIR, path)): - fixed.append(path) + fixed = [path for path in file_paths if path.endswith('.py') and + reindent.check(os.path.join(SRCDIR, path))] return fixed @@ -148,6 +132,21 @@ def reported_news(file_paths): """Check if Misc/NEWS has been changed.""" return 'Misc/NEWS' in file_paths +@status("configure regenerated", modal=True, info=str) +def regenerated_configure(file_paths): + """Check if configure has been regenerated.""" + if 'configure.ac' in file_paths: + return "yes" if 'configure' in file_paths else "no" + else: + return "not needed" + +@status("pyconfig.h.in regenerated", modal=True, info=str) +def regenerated_pyconfig_h_in(file_paths): + """Check if pyconfig.h.in has been regenerated.""" + if 'configure.ac' in file_paths: + return "yes" if 'pyconfig.h.in' in file_paths else "no" + else: + return "not needed" def main(): file_paths = changed_files() @@ -167,6 +166,10 @@ def main(): credit_given(special_files) # Misc/NEWS changed. reported_news(special_files) + # Regenerated configure, if necessary. + regenerated_configure(file_paths) + # Regenerated pyconfig.h.in, if necessary. + regenerated_pyconfig_h_in(file_paths) # Test suite run and passed. if python_files or c_files: diff --git a/Tools/scripts/pysetup3 b/Tools/scripts/pysetup3 new file mode 100755 index 0000000..e6a908d --- /dev/null +++ b/Tools/scripts/pysetup3 @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +import sys +from packaging.run import main +sys.exit(main()) diff --git a/Tools/scripts/pysource.py b/Tools/scripts/pysource.py index 048131e..c7dbe60 100755 --- a/Tools/scripts/pysource.py +++ b/Tools/scripts/pysource.py @@ -42,7 +42,7 @@ def _open(fullpath): return None try: - return open(fullpath, 'rU') + return open(fullpath) except IOError as err: # Access denied, or a special file - ignore it print_debug("%s: access denied: %s" % (fullpath, err)) return None diff --git a/Tools/scripts/reindent.py b/Tools/scripts/reindent.py index b18993b..4a916ea 100755 --- a/Tools/scripts/reindent.py +++ b/Tools/scripts/reindent.py @@ -8,6 +8,8 @@ -r (--recurse) Recurse. Search for all .py files in subdirectories too. -n (--nobackup) No backup. Does not make a ".bak" file before reindenting. -v (--verbose) Verbose. Print informative msgs; else no output. + (--newline) Newline. Specify the newline character to use (CRLF, LF). + Default is the same as the original file. -h (--help) Help. Print this usage information and exit. Change Python (.py) files to use 4-space indents and no hard tab characters. @@ -50,6 +52,8 @@ verbose = False recurse = False dryrun = False makebackup = True +spec_newline = None +"""A specified newline to be used in the output (set by --newline option)""" def usage(msg=None): @@ -62,13 +66,12 @@ def errprint(*args): sys.stderr.write(" ".join(str(arg) for arg in args)) sys.stderr.write("\n") - def main(): import getopt - global verbose, recurse, dryrun, makebackup + global verbose, recurse, dryrun, makebackup, spec_newline try: opts, args = getopt.getopt(sys.argv[1:], "drnvh", - ["dryrun", "recurse", "nobackup", "verbose", "help"]) + ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"]) except getopt.error as msg: usage(msg) return @@ -81,6 +84,11 @@ def main(): makebackup = False elif o in ('-v', '--verbose'): verbose = True + elif o in ('--newline',): + if not a.upper() in ('CRLF', 'LF'): + usage() + return + spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()] elif o in ('-h', '--help'): usage() return @@ -118,9 +126,9 @@ def check(file): errprint("%s: I/O Error: %s" % (file, str(msg))) return - newline = r.newlines + newline = spec_newline if spec_newline else r.newlines if isinstance(newline, tuple): - errprint("%s: mixed newlines detected; cannot process file" % file) + errprint("%s: mixed newlines detected; cannot continue without --newline" % file) return if r.run(): diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py new file mode 100755 index 0000000..f750e19 --- /dev/null +++ b/Tools/scripts/run_tests.py @@ -0,0 +1,47 @@ +"""Run Python's test suite in a fast, rigorous way. + +The defaults are meant to be reasonably thorough, while skipping certain +tests that can be time-consuming or resource-intensive (e.g. largefile), +or distracting (e.g. audio and gui). These defaults can be overridden by +simply passing a -u option to this script. + +""" + +import os +import sys +import test.support + + +def is_multiprocess_flag(arg): + return arg.startswith('-j') or arg.startswith('--multiprocess') + + +def is_resource_use_flag(arg): + return arg.startswith('-u') or arg.startswith('--use') + + +def main(regrtest_args): + args = [sys.executable, + '-W', 'default', # Warnings set to 'default' + '-bb', # Warnings about bytes/bytearray + '-E', # Ignore environment variables + ] + # Allow user-specified interpreter options to override our defaults. + args.extend(test.support.args_from_interpreter_flags()) + args.extend(['-m', 'test', # Run the test suite + '-r', # Randomize test order + '-w', # Re-run failed tests in verbose mode + ]) + if sys.platform == 'win32': + args.append('-n') # Silence alerts under Windows + if not any(is_multiprocess_flag(arg) for arg in regrtest_args): + args.extend(['-j', '0']) # Use all CPU cores + if not any(is_resource_use_flag(arg) for arg in regrtest_args): + args.extend(['-u', 'all,-largefile,-audio,-gui']) + args.extend(regrtest_args) + print(' '.join(args)) + os.execv(sys.executable, args) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/Tools/unicode/comparecodecs.py b/Tools/unicode/comparecodecs.py index 0f5c1e2..7de14fd 100644 --- a/Tools/unicode/comparecodecs.py +++ b/Tools/unicode/comparecodecs.py @@ -14,7 +14,7 @@ def compare_codecs(encoding1, encoding2): print('Comparing encoding/decoding of %r and %r' % (encoding1, encoding2)) mismatch = 0 # Check encoding - for i in range(sys.maxunicode): + for i in range(sys.maxunicode+1): u = chr(i) try: c1 = u.encode(encoding1) diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index d503190..db0f8ec 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -21,17 +21,24 @@ # 2004-05-29 perky add east asian width information # 2006-03-10 mvl update to Unicode 4.1; add UCD 3.2 delta # 2008-06-11 gb add PRINTABLE_MASK for Atsuo Ishimoto's ascii() patch +# 2011-10-21 ezio add support for name aliases and named sequences +# 2012-01 benjamin add full case mappings # # written by Fredrik Lundh (fredrik@pythonware.com) # -import sys, os, zipfile +import os +import sys +import zipfile + +from textwrap import dedent +from operator import itemgetter SCRIPT = sys.argv[0] VERSION = "3.2" # The Unicode Database -UNIDATA_VERSION = "6.0.0" +UNIDATA_VERSION = "6.1.0" UNICODE_DATA = "UnicodeData%s.txt" COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" EASTASIAN_WIDTH = "EastAsianWidth%s.txt" @@ -39,6 +46,19 @@ UNIHAN = "Unihan%s.zip" DERIVED_CORE_PROPERTIES = "DerivedCoreProperties%s.txt" DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt" LINE_BREAK = "LineBreak%s.txt" +NAME_ALIASES = "NameAliases%s.txt" +NAMED_SEQUENCES = "NamedSequences%s.txt" +SPECIAL_CASING = "SpecialCasing%s.txt" +CASE_FOLDING = "CaseFolding%s.txt" + +# Private Use Areas -- in planes 1, 15, 16 +PUA_1 = range(0xE000, 0xF900) +PUA_15 = range(0xF0000, 0xFFFFE) +PUA_16 = range(0x100000, 0x10FFFE) + +# we use this ranges of PUA_15 to store name aliases and named sequences +NAME_ALIASES_START = 0xF0000 +NAMED_SEQUENCES_START = 0xF0200 old_versions = ["3.2.0"] @@ -67,13 +87,15 @@ UPPER_MASK = 0x80 XID_START_MASK = 0x100 XID_CONTINUE_MASK = 0x200 PRINTABLE_MASK = 0x400 -NODELTA_MASK = 0x800 -NUMERIC_MASK = 0x1000 +NUMERIC_MASK = 0x800 +CASE_IGNORABLE_MASK = 0x1000 +CASED_MASK = 0x2000 +EXTENDED_CASE_MASK = 0x4000 # these ranges need to match unicodedata.c:is_unified_ideograph cjk_ranges = [ ('3400', '4DB5'), - ('4E00', '9FCB'), + ('4E00', '9FCC'), ('20000', '2A6D6'), ('2A700', '2B734'), ('2B740', '2B81D') @@ -367,6 +389,7 @@ def makeunicodetype(unicode, trace): numeric = {} spaces = [] linebreaks = [] + extra_casing = [] for char in unicode.chars: record = unicode.table[char] @@ -379,7 +402,7 @@ def makeunicodetype(unicode, trace): delta = True if category in ["Lm", "Lt", "Lu", "Ll", "Lo"]: flags |= ALPHA_MASK - if category == "Ll": + if "Lowercase" in properties: flags |= LOWER_MASK if 'Line_Break' in properties or bidirectional == "B": flags |= LINEBREAK_MASK @@ -389,7 +412,7 @@ def makeunicodetype(unicode, trace): spaces.append(char) if category == "Lt": flags |= TITLE_MASK - if category == "Lu": + if "Uppercase" in properties: flags |= UPPER_MASK if char == ord(" ") or category[0] not in ("C", "Z"): flags |= PRINTABLE_MASK @@ -397,7 +420,12 @@ def makeunicodetype(unicode, trace): flags |= XID_START_MASK if "XID_Continue" in properties: flags |= XID_CONTINUE_MASK - # use delta predictor for upper/lower/title if it fits + if "Cased" in properties: + flags |= CASED_MASK + if "Case_Ignorable" in properties: + flags |= CASE_IGNORABLE_MASK + sc = unicode.special_casing.get(char) + cf = unicode.case_folding.get(char, [char]) if record[12]: upper = int(record[12], 16) else: @@ -409,23 +437,39 @@ def makeunicodetype(unicode, trace): if record[14]: title = int(record[14], 16) else: - # UCD.html says that a missing title char means that - # it defaults to the uppercase character, not to the - # character itself. Apparently, in the current UCD (5.x) - # this feature is never used title = upper - upper_d = upper - char - lower_d = lower - char - title_d = title - char - if -32768 <= upper_d <= 32767 and \ - -32768 <= lower_d <= 32767 and \ - -32768 <= title_d <= 32767: - # use deltas - upper = upper_d & 0xffff - lower = lower_d & 0xffff - title = title_d & 0xffff + if sc is None and cf != [lower]: + sc = ([lower], [title], [upper]) + if sc is None: + if upper == lower == title: + upper = lower = title = 0 + else: + upper = upper - char + lower = lower - char + title = title - char + assert (abs(upper) <= 2147483647 and + abs(lower) <= 2147483647 and + abs(title) <= 2147483647) else: - flags |= NODELTA_MASK + # This happens either when some character maps to more than one + # character in uppercase, lowercase, or titlecase or the + # casefolded version of the character is different from the + # lowercase. The extra characters are stored in a different + # array. + flags |= EXTENDED_CASE_MASK + lower = len(extra_casing) | (len(sc[0]) << 24) + extra_casing.extend(sc[0]) + if cf != sc[0]: + lower |= len(cf) << 20 + extra_casing.extend(cf) + upper = len(extra_casing) | (len(sc[2]) << 24) + extra_casing.extend(sc[2]) + # Title is probably equal to upper. + if sc[1] == sc[2]: + title = upper + else: + title = len(extra_casing) | (len(sc[1]) << 24) + extra_casing.extend(sc[1]) # decimal digit, integer digit decimal = 0 if record[6]: @@ -452,6 +496,7 @@ def makeunicodetype(unicode, trace): print(sum(map(len, numeric.values())), "numeric code points") print(len(spaces), "whitespace code points") print(len(linebreaks), "linebreak code points") + print(len(extra_casing), "extended case array") print("--- Writing", FILE, "...") @@ -465,6 +510,14 @@ def makeunicodetype(unicode, trace): print("};", file=fp) print(file=fp) + print("/* extended case mappings */", file=fp) + print(file=fp) + print("const Py_UCS4 _PyUnicode_ExtendedCase[] = {", file=fp) + for c in extra_casing: + print(" %d," % c, file=fp) + print("};", file=fp) + print(file=fp) + # split decomposition index table index1, index2, shift = splitbins(index, trace) @@ -692,6 +745,39 @@ def makeunicodename(unicode, trace): print("/* name->code dictionary */", file=fp) codehash.dump(fp, trace) + print(file=fp) + print('static const unsigned int aliases_start = %#x;' % + NAME_ALIASES_START, file=fp) + print('static const unsigned int aliases_end = %#x;' % + (NAME_ALIASES_START + len(unicode.aliases)), file=fp) + + print('static const unsigned int name_aliases[] = {', file=fp) + for name, codepoint in unicode.aliases: + print(' 0x%04X,' % codepoint, file=fp) + print('};', file=fp) + + # In Unicode 6.0.0, the sequences contain at most 4 BMP chars, + # so we are using Py_UCS2 seq[4]. This needs to be updated if longer + # sequences or sequences with non-BMP chars are added. + # unicodedata_lookup should be adapted too. + print(dedent(""" + typedef struct NamedSequence { + int seqlen; + Py_UCS2 seq[4]; + } named_sequence; + """), file=fp) + + print('static const unsigned int named_sequences_start = %#x;' % + NAMED_SEQUENCES_START, file=fp) + print('static const unsigned int named_sequences_end = %#x;' % + (NAMED_SEQUENCES_START + len(unicode.named_sequences)), file=fp) + + print('static const named_sequence named_sequences[] = {', file=fp) + for name, sequence in unicode.named_sequences: + seq_str = ', '.join('0x%04X' % cp for cp in sequence) + print(' {%d, {%s}},' % (len(sequence), seq_str), file=fp) + print('};', file=fp) + fp.close() @@ -726,7 +812,11 @@ def merge_old_version(version, new, old): for k in range(len(old.table[i])): if old.table[i][k] != new.table[i][k]: value = old.table[i][k] - if k == 2: + if k == 1 and i in PUA_15: + # the name is not set in the old.table, but in the + # new.table we are using it for aliases and named seq + assert value == '' + elif k == 2: #print "CATEGORY",hex(i), old.table[i][k], new.table[i][k] category_changes[i] = CATEGORY_NAMES.index(value) elif k == 4: @@ -816,15 +906,15 @@ class UnicodeData: expand=1, cjk_check=True): self.changed = [] - file = open_data(UNICODE_DATA, version) table = [None] * 0x110000 - while 1: - s = file.readline() - if not s: - break - s = s.strip().split(";") - char = int(s[0], 16) - table[char] = s + with open_data(UNICODE_DATA, version) as file: + while 1: + s = file.readline() + if not s: + break + s = s.strip().split(";") + char = int(s[0], 16) + table[char] = s cjk_ranges_found = [] @@ -855,32 +945,79 @@ class UnicodeData: self.table = table self.chars = list(range(0x110000)) # unicode 3.2 - file = open_data(COMPOSITION_EXCLUSIONS, version) + # check for name aliases and named sequences, see #12753 + # aliases and named sequences are not in 3.2.0 + if version != '3.2.0': + self.aliases = [] + # store aliases in the Private Use Area 15, in range U+F0000..U+F00FF, + # in order to take advantage of the compression and lookup + # algorithms used for the other characters + pua_index = NAME_ALIASES_START + with open_data(NAME_ALIASES, version) as file: + for s in file: + s = s.strip() + if not s or s.startswith('#'): + continue + char, name, abbrev = s.split(';') + char = int(char, 16) + self.aliases.append((name, char)) + # also store the name in the PUA 1 + self.table[pua_index][1] = name + pua_index += 1 + assert pua_index - NAME_ALIASES_START == len(self.aliases) + + self.named_sequences = [] + # store named seqences in the PUA 1, in range U+F0100.., + # in order to take advantage of the compression and lookup + # algorithms used for the other characters. + + assert pua_index < NAMED_SEQUENCES_START + pua_index = NAMED_SEQUENCES_START + with open_data(NAMED_SEQUENCES, version) as file: + for s in file: + s = s.strip() + if not s or s.startswith('#'): + continue + name, chars = s.split(';') + chars = tuple(int(char, 16) for char in chars.split()) + # check that the structure defined in makeunicodename is OK + assert 2 <= len(chars) <= 4, "change the Py_UCS2 array size" + assert all(c <= 0xFFFF for c in chars), ("use Py_UCS4 in " + "the NamedSequence struct and in unicodedata_lookup") + self.named_sequences.append((name, chars)) + # also store these in the PUA 1 + self.table[pua_index][1] = name + pua_index += 1 + assert pua_index - NAMED_SEQUENCES_START == len(self.named_sequences) + self.exclusions = {} - for s in file: - s = s.strip() - if not s: - continue - if s[0] == '#': - continue - char = int(s.split()[0],16) - self.exclusions[char] = 1 + with open_data(COMPOSITION_EXCLUSIONS, version) as file: + for s in file: + s = s.strip() + if not s: + continue + if s[0] == '#': + continue + char = int(s.split()[0],16) + self.exclusions[char] = 1 widths = [None] * 0x110000 - for s in open_data(EASTASIAN_WIDTH, version): - s = s.strip() - if not s: - continue - if s[0] == '#': - continue - s = s.split()[0].split(';') - if '..' in s[0]: - first, last = [int(c, 16) for c in s[0].split('..')] - chars = list(range(first, last+1)) - else: - chars = [int(s[0], 16)] - for char in chars: - widths[char] = s[1] + with open_data(EASTASIAN_WIDTH, version) as file: + for s in file: + s = s.strip() + if not s: + continue + if s[0] == '#': + continue + s = s.split()[0].split(';') + if '..' in s[0]: + first, last = [int(c, 16) for c in s[0].split('..')] + chars = list(range(first, last+1)) + else: + chars = [int(s[0], 16)] + for char in chars: + widths[char] = s[1] + for i in range(0, 0x110000): if table[i] is not None: table[i].append(widths[i]) @@ -888,36 +1025,39 @@ class UnicodeData: for i in range(0, 0x110000): if table[i] is not None: table[i].append(set()) - for s in open_data(DERIVED_CORE_PROPERTIES, version): - s = s.split('#', 1)[0].strip() - if not s: - continue - r, p = s.split(";") - r = r.strip() - p = p.strip() - if ".." in r: - first, last = [int(c, 16) for c in r.split('..')] - chars = list(range(first, last+1)) - else: - chars = [int(r, 16)] - for char in chars: - if table[char]: - # Some properties (e.g. Default_Ignorable_Code_Point) - # apply to unassigned code points; ignore them - table[char][-1].add(p) - - for s in open_data(LINE_BREAK, version): - s = s.partition('#')[0] - s = [i.strip() for i in s.split(';')] - if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS: - continue - if '..' not in s[0]: - first = last = int(s[0], 16) - else: - first, last = [int(c, 16) for c in s[0].split('..')] - for char in range(first, last+1): - table[char][-1].add('Line_Break') + with open_data(DERIVED_CORE_PROPERTIES, version) as file: + for s in file: + s = s.split('#', 1)[0].strip() + if not s: + continue + + r, p = s.split(";") + r = r.strip() + p = p.strip() + if ".." in r: + first, last = [int(c, 16) for c in r.split('..')] + chars = list(range(first, last+1)) + else: + chars = [int(r, 16)] + for char in chars: + if table[char]: + # Some properties (e.g. Default_Ignorable_Code_Point) + # apply to unassigned code points; ignore them + table[char][-1].add(p) + + with open_data(LINE_BREAK, version) as file: + for s in file: + s = s.partition('#')[0] + s = [i.strip() for i in s.split(';')] + if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS: + continue + if '..' not in s[0]: + first = last = int(s[0], 16) + else: + first, last = [int(c, 16) for c in s[0].split('..')] + for char in range(first, last+1): + table[char][-1].add('Line_Break') # We only want the quickcheck properties # Format: NF?_QC; Y(es)/N(o)/M(aybe) @@ -928,31 +1068,33 @@ class UnicodeData: # for older versions, and no delta records will be created. quickchecks = [0] * 0x110000 qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split() - for s in open_data(DERIVEDNORMALIZATION_PROPS, version): - if '#' in s: - s = s[:s.index('#')] - s = [i.strip() for i in s.split(';')] - if len(s) < 2 or s[1] not in qc_order: - continue - quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No - quickcheck_shift = qc_order.index(s[1])*2 - quickcheck <<= quickcheck_shift - if '..' not in s[0]: - first = last = int(s[0], 16) - else: - first, last = [int(c, 16) for c in s[0].split('..')] - for char in range(first, last+1): - assert not (quickchecks[char]>>quickcheck_shift)&3 - quickchecks[char] |= quickcheck + with open_data(DERIVEDNORMALIZATION_PROPS, version) as file: + for s in file: + if '#' in s: + s = s[:s.index('#')] + s = [i.strip() for i in s.split(';')] + if len(s) < 2 or s[1] not in qc_order: + continue + quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No + quickcheck_shift = qc_order.index(s[1])*2 + quickcheck <<= quickcheck_shift + if '..' not in s[0]: + first = last = int(s[0], 16) + else: + first, last = [int(c, 16) for c in s[0].split('..')] + for char in range(first, last+1): + assert not (quickchecks[char]>>quickcheck_shift)&3 + quickchecks[char] |= quickcheck for i in range(0, 0x110000): if table[i] is not None: table[i].append(quickchecks[i]) - zip = zipfile.ZipFile(open_data(UNIHAN, version)) - if version == '3.2.0': - data = zip.open('Unihan-3.2.0.txt').read() - else: - data = zip.open('Unihan_NumericValues.txt').read() + with open_data(UNIHAN, version) as file: + zip = zipfile.ZipFile(file) + if version == '3.2.0': + data = zip.open('Unihan-3.2.0.txt').read() + else: + data = zip.open('Unihan_NumericValues.txt').read() for line in data.decode("utf-8").splitlines(): if not line.startswith('U+'): continue @@ -965,6 +1107,34 @@ class UnicodeData: # Patch the numeric field if table[i] is not None: table[i][8] = value + sc = self.special_casing = {} + with open_data(SPECIAL_CASING, version) as file: + for s in file: + s = s[:-1].split('#', 1)[0] + if not s: + continue + data = s.split("; ") + if data[4]: + # We ignore all conditionals (since they depend on + # languages) except for one, which is hardcoded. See + # handle_capital_sigma in unicodeobject.c. + continue + c = int(data[0], 16) + lower = [int(char, 16) for char in data[1].split()] + title = [int(char, 16) for char in data[2].split()] + upper = [int(char, 16) for char in data[3].split()] + sc[c] = (lower, title, upper) + cf = self.case_folding = {} + if version != '3.2.0': + with open_data(CASE_FOLDING, version) as file: + for s in file: + s = s[:-1].split('#', 1)[0] + if not s: + continue + data = s.split("; ") + if data[1] in "CF": + c = int(data[0], 16) + cf[c] = [int(char, 16) for char in data[2].split()] def uselatin1(self): # restrict character range to ISO Latin 1 diff --git a/Tools/unittestgui/unittestgui.py b/Tools/unittestgui/unittestgui.py index b526646..09a20e2 100644 --- a/Tools/unittestgui/unittestgui.py +++ b/Tools/unittestgui/unittestgui.py @@ -28,7 +28,6 @@ SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. """ __author__ = "Steve Purcell (stephen_purcell@yahoo.com)" -__version__ = "$Revision: 1.7 $"[11:-2] import sys import traceback |