diff options
Diffstat (limited to 'Tools')
44 files changed, 6103 insertions, 219 deletions
diff --git a/Tools/buildbot/external-amd64.bat b/Tools/buildbot/external-amd64.bat index d2ff255..4c3b67b 100644 --- a/Tools/buildbot/external-amd64.bat +++ b/Tools/buildbot/external-amd64.bat @@ -4,18 +4,18 @@ call "Tools\buildbot\external-common.bat" call "%VS100COMNTOOLS%\..\..\VC\vcvarsall.bat" x86_amd64 -if not exist tcltk64\bin\tcl85g.dll ( - cd tcl-8.5.11.0\win - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 clean all - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 install +if not exist tcltk64\bin\tcl86tg.dll ( + cd tcl-8.6.1.0\win + nmake -f makefile.vc DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 clean all + nmake -f makefile.vc DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 install cd ..\.. ) -if not exist tcltk64\bin\tk85g.dll ( - cd tk-8.5.11.0\win - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.11.0 clean - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.11.0 all - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.5.11.0 install +if not exist tcltk64\bin\tk86tg.dll ( + cd tk-8.6.1.0\win + nmake -f makefile.vc OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.6.1.0 clean + nmake -f makefile.vc OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.6.1.0 all + nmake -f makefile.vc OPTS=noxp DEBUG=1 MACHINE=AMD64 INSTALLDIR=..\..\tcltk64 TCLDIR=..\..\tcl-8.6.1.0 install cd ..\.. ) diff --git a/Tools/buildbot/external-common.bat b/Tools/buildbot/external-common.bat index c6278b2..244abd2 100644 --- a/Tools/buildbot/external-common.bat +++ b/Tools/buildbot/external-common.bat @@ -30,19 +30,20 @@ if not exist openssl-1.0.1e ( ) @rem tcl/tk -if not exist tcl-8.5.11.0 ( - rd /s/q tcltk tcltk64 - svn export http://svn.python.org/projects/external/tcl-8.5.11.0 +if not exist tcl-8.6.1.0 ( + rd /s/q tcltk tcltk64 tcl-8.5.11.0 tk-8.5.11.0 + svn export http://svn.python.org/projects/external/tcl-8.6.1.0 ) -if not exist tk-8.5.11.0 svn export http://svn.python.org/projects/external/tk-8.5.11.0 +if not exist tk-8.6.1.0 svn export http://svn.python.org/projects/external/tk-8.6.1.0 @rem sqlite3 -if not exist sqlite-3.7.12 ( - rd /s/q sqlite-source-3.7.4 - svn export http://svn.python.org/projects/external/sqlite-3.7.12 +if not exist sqlite-3.8.1 ( + rd /s/q sqlite-source-3.7.12 + svn export http://svn.python.org/projects/external/sqlite-3.8.1 ) @rem lzma -if not exist xz-5.0.3 ( - svn export http://svn.python.org/projects/external/xz-5.0.3 +if not exist xz-5.0.5 ( + rd /s/q xz-5.0.3 + svn export http://svn.python.org/projects/external/xz-5.0.5 ) diff --git a/Tools/buildbot/external.bat b/Tools/buildbot/external.bat index ed5c10e..c580a14 100644 --- a/Tools/buildbot/external.bat +++ b/Tools/buildbot/external.bat @@ -4,18 +4,18 @@ call "Tools\buildbot\external-common.bat" call "%VS100COMNTOOLS%\vsvars32.bat" -if not exist tcltk\bin\tcl85g.dll ( +if not exist tcltk\bin\tcl86tg.dll ( @rem all and install need to be separate invocations, otherwise nmakehlp is not found on install - cd tcl-8.5.11.0\win - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 DEBUG=1 INSTALLDIR=..\..\tcltk clean all + cd tcl-8.6.1.0\win + nmake -f makefile.vc DEBUG=1 INSTALLDIR=..\..\tcltk clean all nmake -f makefile.vc DEBUG=1 INSTALLDIR=..\..\tcltk install cd ..\.. ) -if not exist tcltk\bin\tk85g.dll ( - cd tk-8.5.11.0\win - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.11.0 clean - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.11.0 all - nmake -f makefile.vc COMPILERFLAGS=-DWINVER=0x0500 OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.5.11.0 install +if not exist tcltk\bin\tk86tg.dll ( + cd tk-8.6.1.0\win + nmake -f makefile.vc OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.6.1.0 clean + nmake -f makefile.vc OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.6.1.0 all + nmake -f makefile.vc OPTS=noxp DEBUG=1 INSTALLDIR=..\..\tcltk TCLDIR=..\..\tcl-8.6.1.0 install cd ..\.. ) diff --git a/Tools/buildbot/test-amd64.bat b/Tools/buildbot/test-amd64.bat index 1bf124c..de64f25 100644 --- a/Tools/buildbot/test-amd64.bat +++ b/Tools/buildbot/test-amd64.bat @@ -1,3 +1,3 @@ @rem Used by the buildbot "test" step. cd PCbuild -call rt.bat -d -q -x64 -uall -rwW -n %1 %2 %3 %4 %5 %6 %7 %8 %9 +call rt.bat -d -q -x64 -uall -rwW -n --timeout=3600 %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/Tools/buildbot/test.bat b/Tools/buildbot/test.bat index 4f4c6aa..4e4db10 100644 --- a/Tools/buildbot/test.bat +++ b/Tools/buildbot/test.bat @@ -1,3 +1,3 @@ @rem Used by the buildbot "test" step. cd PCbuild -call rt.bat -d -q -uall -rwW -n %1 %2 %3 %4 %5 %6 %7 %8 %9 +call rt.bat -d -q -uall -rwW -n --timeout=3600 %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py new file mode 100755 index 0000000..68a1436 --- /dev/null +++ b/Tools/clinic/clinic.py @@ -0,0 +1,4199 @@ +#!/usr/bin/env python3 +# +# Argument Clinic +# Copyright 2012-2013 by Larry Hastings. +# Licensed to the PSF under a contributor agreement. +# + +import abc +import ast +import atexit +import collections +import contextlib +import copy +import cpp +import functools +import hashlib +import inspect +import io +import itertools +import os +import pprint +import re +import shlex +import string +import sys +import tempfile +import textwrap +import traceback +import uuid + +# TODO: +# +# soon: +# +# * allow mixing any two of {positional-only, positional-or-keyword, +# keyword-only} +# * dict constructor uses positional-only and keyword-only +# * max and min use positional only with an optional group +# and keyword-only +# + +version = '1' + +_empty = inspect._empty +_void = inspect._void + +NoneType = type(None) + +class Unspecified: + def __repr__(self): + return '<Unspecified>' + +unspecified = Unspecified() + + +class Null: + def __repr__(self): + return '<Null>' + +NULL = Null() + + +class Unknown: + def __repr__(self): + return '<Unknown>' + +unknown = Unknown() + + +def _text_accumulator(): + text = [] + def output(): + s = ''.join(text) + text.clear() + return s + return text, text.append, output + + +def text_accumulator(): + """ + Creates a simple text accumulator / joiner. + + Returns a pair of callables: + append, output + "append" appends a string to the accumulator. + "output" returns the contents of the accumulator + joined together (''.join(accumulator)) and + empties the accumulator. + """ + text, append, output = _text_accumulator() + return append, output + + +def warn_or_fail(fail=False, *args, filename=None, line_number=None): + joined = " ".join([str(a) for a in args]) + add, output = text_accumulator() + if fail: + add("Error") + else: + add("Warning") + if clinic: + if filename is None: + filename = clinic.filename + if getattr(clinic, 'block_parser', None) and (line_number is None): + line_number = clinic.block_parser.line_number + if filename is not None: + add(' in file "' + filename + '"') + if line_number is not None: + add(" on line " + str(line_number)) + add(':\n') + add(joined) + print(output()) + if fail: + sys.exit(-1) + + +def warn(*args, filename=None, line_number=None): + return warn_or_fail(False, *args, filename=filename, line_number=line_number) + +def fail(*args, filename=None, line_number=None): + return warn_or_fail(True, *args, filename=filename, line_number=line_number) + + +def quoted_for_c_string(s): + for old, new in ( + ('\\', '\\\\'), # must be first! + ('"', '\\"'), + ("'", "\\'"), + ): + s = s.replace(old, new) + return s + +def c_repr(s): + return '"' + s + '"' + + +is_legal_c_identifier = re.compile('^[A-Za-z_][A-Za-z0-9_]*$').match + +def is_legal_py_identifier(s): + return all(is_legal_c_identifier(field) for field in s.split('.')) + +# identifiers that are okay in Python but aren't a good idea in C. +# so if they're used Argument Clinic will add "_value" to the end +# of the name in C. +c_keywords = set(""" +asm auto break case char const continue default do double +else enum extern float for goto if inline int long +register return short signed sizeof static struct switch +typedef typeof union unsigned void volatile while +""".strip().split()) + +def ensure_legal_c_identifier(s): + # for now, just complain if what we're given isn't legal + if not is_legal_c_identifier(s): + fail("Illegal C identifier: {}".format(s)) + # but if we picked a C keyword, pick something else + if s in c_keywords: + return s + "_value" + return s + +def rstrip_lines(s): + text, add, output = _text_accumulator() + for line in s.split('\n'): + add(line.rstrip()) + add('\n') + text.pop() + return output() + +def linear_format(s, **kwargs): + """ + Perform str.format-like substitution, except: + * The strings substituted must be on lines by + themselves. (This line is the "source line".) + * If the substitution text is empty, the source line + is removed in the output. + * If the field is not recognized, the original line + is passed unmodified through to the output. + * If the substitution text is not empty: + * Each line of the substituted text is indented + by the indent of the source line. + * A newline will be added to the end. + """ + + add, output = text_accumulator() + for line in s.split('\n'): + indent, curly, trailing = line.partition('{') + if not curly: + add(line) + add('\n') + continue + + name, curl, trailing = trailing.partition('}') + if not curly or name not in kwargs: + add(line) + add('\n') + continue + + if trailing: + fail("Text found after {" + name + "} block marker! It must be on a line by itself.") + if indent.strip(): + fail("Non-whitespace characters found before {" + name + "} block marker! It must be on a line by itself.") + + value = kwargs[name] + if not value: + continue + + value = textwrap.indent(rstrip_lines(value), indent) + add(value) + add('\n') + + return output()[:-1] + +def indent_all_lines(s, prefix): + """ + Returns 's', with 'prefix' prepended to all lines. + + If the last line is empty, prefix is not prepended + to it. (If s is blank, returns s unchanged.) + + (textwrap.indent only adds to non-blank lines.) + """ + split = s.split('\n') + last = split.pop() + final = [] + for line in split: + final.append(prefix) + final.append(line) + final.append('\n') + if last: + final.append(prefix) + final.append(last) + return ''.join(final) + +def suffix_all_lines(s, suffix): + """ + Returns 's', with 'suffix' appended to all lines. + + If the last line is empty, suffix is not appended + to it. (If s is blank, returns s unchanged.) + """ + split = s.split('\n') + last = split.pop() + final = [] + for line in split: + final.append(line) + final.append(suffix) + final.append('\n') + if last: + final.append(last) + final.append(suffix) + return ''.join(final) + + +def version_splitter(s): + """Splits a version string into a tuple of integers. + + The following ASCII characters are allowed, and employ + the following conversions: + a -> -3 + b -> -2 + c -> -1 + (This permits Python-style version strings such as "1.4b3".) + """ + version = [] + accumulator = [] + def flush(): + if not accumulator: + raise ValueError('Unsupported version string: ' + repr(s)) + version.append(int(''.join(accumulator))) + accumulator.clear() + + for c in s: + if c.isdigit(): + accumulator.append(c) + elif c == '.': + flush() + elif c in 'abc': + flush() + version.append('abc'.index(c) - 3) + else: + raise ValueError('Illegal character ' + repr(c) + ' in version string ' + repr(s)) + flush() + return tuple(version) + +def version_comparitor(version1, version2): + iterator = itertools.zip_longest(version_splitter(version1), version_splitter(version2), fillvalue=0) + for i, (a, b) in enumerate(iterator): + if a < b: + return -1 + if a > b: + return 1 + return 0 + + +class CRenderData: + def __init__(self): + + # The C statements to declare variables. + # Should be full lines with \n eol characters. + self.declarations = [] + + # The C statements required to initialize the variables before the parse call. + # Should be full lines with \n eol characters. + self.initializers = [] + + # The C statements needed to dynamically modify the values + # parsed by the parse call, before calling the impl. + self.modifications = [] + + # The entries for the "keywords" array for PyArg_ParseTuple. + # Should be individual strings representing the names. + self.keywords = [] + + # The "format units" for PyArg_ParseTuple. + # Should be individual strings that will get + self.format_units = [] + + # The varargs arguments for PyArg_ParseTuple. + self.parse_arguments = [] + + # The parameter declarations for the impl function. + self.impl_parameters = [] + + # The arguments to the impl function at the time it's called. + self.impl_arguments = [] + + # For return converters: the name of the variable that + # should receive the value returned by the impl. + self.return_value = "return_value" + + # For return converters: the code to convert the return + # value from the parse function. This is also where + # you should check the _return_value for errors, and + # "goto exit" if there are any. + self.return_conversion = [] + + # The C statements required to clean up after the impl call. + self.cleanup = [] + + +class FormatCounterFormatter(string.Formatter): + """ + This counts how many instances of each formatter + "replacement string" appear in the format string. + + e.g. after evaluating "string {a}, {b}, {c}, {a}" + the counts dict would now look like + {'a': 2, 'b': 1, 'c': 1} + """ + def __init__(self): + self.counts = collections.Counter() + + def get_value(self, key, args, kwargs): + self.counts[key] += 1 + return '' + +class Language(metaclass=abc.ABCMeta): + + start_line = "" + body_prefix = "" + stop_line = "" + checksum_line = "" + + def __init__(self, filename): + pass + + @abc.abstractmethod + def render(self, clinic, signatures): + pass + + def parse_line(self, line): + pass + + def validate(self): + def assert_only_one(attr, *additional_fields): + """ + Ensures that the string found at getattr(self, attr) + contains exactly one formatter replacement string for + each valid field. The list of valid fields is + ['dsl_name'] extended by additional_fields. + + e.g. + self.fmt = "{dsl_name} {a} {b}" + + # this passes + self.assert_only_one('fmt', 'a', 'b') + + # this fails, the format string has a {b} in it + self.assert_only_one('fmt', 'a') + + # this fails, the format string doesn't have a {c} in it + self.assert_only_one('fmt', 'a', 'b', 'c') + + # this fails, the format string has two {a}s in it, + # it must contain exactly one + self.fmt2 = '{dsl_name} {a} {a}' + self.assert_only_one('fmt2', 'a') + + """ + fields = ['dsl_name'] + fields.extend(additional_fields) + line = getattr(self, attr) + fcf = FormatCounterFormatter() + fcf.format(line) + def local_fail(should_be_there_but_isnt): + if should_be_there_but_isnt: + fail("{} {} must contain {{{}}} exactly once!".format( + self.__class__.__name__, attr, name)) + else: + fail("{} {} must not contain {{{}}}!".format( + self.__class__.__name__, attr, name)) + + for name, count in fcf.counts.items(): + if name in fields: + if count > 1: + local_fail(True) + else: + local_fail(False) + for name in fields: + if fcf.counts.get(name) != 1: + local_fail(True) + + assert_only_one('start_line') + assert_only_one('stop_line') + + field = "arguments" if "{arguments}" in self.checksum_line else "checksum" + assert_only_one('checksum_line', field) + + + +class PythonLanguage(Language): + + language = 'Python' + start_line = "#/*[{dsl_name} input]" + body_prefix = "#" + stop_line = "#[{dsl_name} start generated code]*/" + checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/" + + +def permute_left_option_groups(l): + """ + Given [1, 2, 3], should yield: + () + (3,) + (2, 3) + (1, 2, 3) + """ + yield tuple() + accumulator = [] + for group in reversed(l): + accumulator = list(group) + accumulator + yield tuple(accumulator) + + +def permute_right_option_groups(l): + """ + Given [1, 2, 3], should yield: + () + (1,) + (1, 2) + (1, 2, 3) + """ + yield tuple() + accumulator = [] + for group in l: + accumulator.extend(group) + yield tuple(accumulator) + + +def permute_optional_groups(left, required, right): + """ + Generator function that computes the set of acceptable + argument lists for the provided iterables of + argument groups. (Actually it generates a tuple of tuples.) + + Algorithm: prefer left options over right options. + + If required is empty, left must also be empty. + """ + required = tuple(required) + result = [] + + if not required: + assert not left + + accumulator = [] + counts = set() + for r in permute_right_option_groups(right): + for l in permute_left_option_groups(left): + t = l + required + r + if len(t) in counts: + continue + counts.add(len(t)) + accumulator.append(t) + + accumulator.sort(key=len) + return tuple(accumulator) + + +def strip_leading_and_trailing_blank_lines(s): + lines = s.rstrip().split('\n') + while lines: + line = lines[0] + if line.strip(): + break + del lines[0] + return '\n'.join(lines) + +@functools.lru_cache() +def normalize_snippet(s, *, indent=0): + """ + Reformats s: + * removes leading and trailing blank lines + * ensures that it does not end with a newline + * dedents so the first nonwhite character on any line is at column "indent" + """ + s = strip_leading_and_trailing_blank_lines(s) + s = textwrap.dedent(s) + if indent: + s = textwrap.indent(s, ' ' * indent) + return s + + +class CLanguage(Language): + + body_prefix = "#" + language = 'C' + start_line = "/*[{dsl_name} input]" + body_prefix = "" + stop_line = "[{dsl_name} start generated code]*/" + checksum_line = "/*[{dsl_name} end generated code: {arguments}]*/" + + def __init__(self, filename): + super().__init__(filename) + self.cpp = cpp.Monitor(filename) + self.cpp.fail = fail + + def parse_line(self, line): + self.cpp.writeline(line) + + def render(self, clinic, signatures): + function = None + for o in signatures: + if isinstance(o, Function): + if function: + fail("You may specify at most one function per block.\nFound a block containing at least two:\n\t" + repr(function) + " and " + repr(o)) + function = o + return self.render_function(clinic, function) + + def docstring_for_c_string(self, f): + text, add, output = _text_accumulator() + # turn docstring into a properly quoted C string + for line in f.docstring.split('\n'): + add('"') + add(quoted_for_c_string(line)) + add('\\n"\n') + + text.pop() + add('"') + return ''.join(text) + + def output_templates(self, f): + parameters = list(f.parameters.values()) + assert parameters + assert isinstance(parameters[0].converter, self_converter) + del parameters[0] + converters = [p.converter for p in parameters] + + has_option_groups = parameters and (parameters[0].group or parameters[-1].group) + default_return_converter = (not f.return_converter or + f.return_converter.type == 'PyObject *') + + positional = parameters and (parameters[-1].kind == inspect.Parameter.POSITIONAL_ONLY) + all_boring_objects = False # yes, this will be false if there are 0 parameters, it's fine + first_optional = len(parameters) + for i, p in enumerate(parameters): + c = p.converter + if type(c) != object_converter: + break + if c.format_unit != 'O': + break + if p.default is not unspecified: + first_optional = min(first_optional, i) + else: + all_boring_objects = True + + new_or_init = f.kind in (METHOD_NEW, METHOD_INIT) + + meth_o = (len(parameters) == 1 and + parameters[0].kind == inspect.Parameter.POSITIONAL_ONLY and + not converters[0].is_optional() and + isinstance(converters[0], object_converter) and + converters[0].format_unit == 'O' and + not new_or_init) + + # we have to set these things before we're done: + # + # docstring_prototype + # docstring_definition + # impl_prototype + # methoddef_define + # parser_prototype + # parser_definition + # impl_definition + # cpp_if + # cpp_endif + # methoddef_ifndef + + return_value_declaration = "PyObject *return_value = NULL;" + + methoddef_define = normalize_snippet(""" + #define {methoddef_name} \\ + {{"{name}", (PyCFunction){c_basename}, {methoddef_flags}, {c_basename}__doc__}}, + """) + if new_or_init and not f.docstring: + docstring_prototype = docstring_definition = '' + else: + docstring_prototype = normalize_snippet(""" + PyDoc_VAR({c_basename}__doc__); + """) + docstring_definition = normalize_snippet(""" + PyDoc_STRVAR({c_basename}__doc__, + {docstring}); + """) + impl_definition = normalize_snippet(""" + static {impl_return_type} + {c_basename}_impl({impl_parameters}) + """) + impl_prototype = parser_prototype = parser_definition = None + + parser_prototype_keyword = normalize_snippet(""" + static PyObject * + {c_basename}({self_type}{self_name}, PyObject *args, PyObject *kwargs) + """) + + parser_prototype_varargs = normalize_snippet(""" + static PyObject * + {c_basename}({self_type}{self_name}, PyObject *args) + """) + + # parser_body_fields remembers the fields passed in to the + # previous call to parser_body. this is used for an awful hack. + parser_body_fields = () + def parser_body(prototype, *fields): + nonlocal parser_body_fields + add, output = text_accumulator() + add(prototype) + parser_body_fields = fields + + fields = list(fields) + fields.insert(0, normalize_snippet(""" + {{ + {return_value_declaration} + {declarations} + {initializers} + """) + "\n") + # just imagine--your code is here in the middle + fields.append(normalize_snippet(""" + {modifications} + {return_value} = {c_basename}_impl({impl_arguments}); + {return_conversion} + + {exit_label} + {cleanup} + return return_value; + }} + """)) + for field in fields: + add('\n') + add(field) + return output() + + def insert_keywords(s): + return linear_format(s, declarations="static char *_keywords[] = {{{keywords}, NULL}};\n{declarations}") + + if not parameters: + # no parameters, METH_NOARGS + + flags = "METH_NOARGS" + + parser_prototype = normalize_snippet(""" + static PyObject * + {c_basename}({self_type}{self_name}, PyObject *Py_UNUSED(ignored)) + """) + parser_definition = parser_prototype + + if default_return_converter: + parser_definition = parser_prototype + '\n' + normalize_snippet(""" + {{ + return {c_basename}_impl({impl_arguments}); + }} + """) + else: + parser_definition = parser_body(parser_prototype) + + elif meth_o: + flags = "METH_O" + + meth_o_prototype = normalize_snippet(""" + static PyObject * + {c_basename}({impl_parameters}) + """) + + if default_return_converter: + # maps perfectly to METH_O, doesn't need a return converter. + # so we skip making a parse function + # and call directly into the impl function. + impl_prototype = parser_prototype = parser_definition = '' + impl_definition = meth_o_prototype + else: + # SLIGHT HACK + # use impl_parameters for the parser here! + parser_prototype = meth_o_prototype + parser_definition = parser_body(parser_prototype) + + elif has_option_groups: + # positional parameters with option groups + # (we have to generate lots of PyArg_ParseTuple calls + # in a big switch statement) + + flags = "METH_VARARGS" + parser_prototype = parser_prototype_varargs + + parser_definition = parser_body(parser_prototype, ' {option_group_parsing}') + + elif positional and all_boring_objects: + # positional-only, but no option groups, + # and nothing but normal objects: + # PyArg_UnpackTuple! + + flags = "METH_VARARGS" + parser_prototype = parser_prototype_varargs + + parser_definition = parser_body(parser_prototype, normalize_snippet(""" + if (!PyArg_UnpackTuple(args, "{name}", + {unpack_min}, {unpack_max}, + {parse_arguments})) + goto exit; + """, indent=4)) + + elif positional: + # positional-only, but no option groups + # we only need one call to PyArg_ParseTuple + + flags = "METH_VARARGS" + parser_prototype = parser_prototype_varargs + + parser_definition = parser_body(parser_prototype, normalize_snippet(""" + if (!PyArg_ParseTuple(args, + "{format_units}:{name}", + {parse_arguments})) + goto exit; + """, indent=4)) + + else: + # positional-or-keyword arguments + flags = "METH_VARARGS|METH_KEYWORDS" + + parser_prototype = parser_prototype_keyword + + body = normalize_snippet(""" + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "{format_units}:{name}", _keywords, + {parse_arguments})) + goto exit; + """, indent=4) + parser_definition = parser_body(parser_prototype, normalize_snippet(""" + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "{format_units}:{name}", _keywords, + {parse_arguments})) + goto exit; + """, indent=4)) + parser_definition = insert_keywords(parser_definition) + + + if new_or_init: + methoddef_define = '' + + if f.kind == METHOD_NEW: + parser_prototype = parser_prototype_keyword + else: + return_value_declaration = "int return_value = -1;" + parser_prototype = normalize_snippet(""" + static int + {c_basename}({self_type}{self_name}, PyObject *args, PyObject *kwargs) + """) + + fields = list(parser_body_fields) + parses_positional = 'METH_NOARGS' not in flags + parses_keywords = 'METH_KEYWORDS' in flags + if parses_keywords: + assert parses_positional + + if not parses_keywords: + fields.insert(0, normalize_snippet(""" + if ({self_type_check}!_PyArg_NoKeywords("{name}", kwargs)) + goto exit; + """, indent=4)) + if not parses_positional: + fields.insert(0, normalize_snippet(""" + if ({self_type_check}!_PyArg_NoPositional("{name}", args)) + goto exit; + """, indent=4)) + + parser_definition = parser_body(parser_prototype, *fields) + if parses_keywords: + parser_definition = insert_keywords(parser_definition) + + + if f.methoddef_flags: + flags += '|' + f.methoddef_flags + + methoddef_define = methoddef_define.replace('{methoddef_flags}', flags) + + methoddef_ifndef = '' + conditional = self.cpp.condition() + if not conditional: + cpp_if = cpp_endif = '' + else: + cpp_if = "#if " + conditional + cpp_endif = "#endif /* " + conditional + " */" + + if methoddef_define: + methoddef_ifndef = normalize_snippet(""" + #ifndef {methoddef_name} + #define {methoddef_name} + #endif /* !defined({methoddef_name}) */ + """) + + + # add ';' to the end of parser_prototype and impl_prototype + # (they mustn't be None, but they could be an empty string.) + assert parser_prototype is not None + if parser_prototype: + assert not parser_prototype.endswith(';') + parser_prototype += ';' + + if impl_prototype is None: + impl_prototype = impl_definition + if impl_prototype: + impl_prototype += ";" + + parser_definition = parser_definition.replace("{return_value_declaration}", return_value_declaration) + + d = { + "docstring_prototype" : docstring_prototype, + "docstring_definition" : docstring_definition, + "impl_prototype" : impl_prototype, + "methoddef_define" : methoddef_define, + "parser_prototype" : parser_prototype, + "parser_definition" : parser_definition, + "impl_definition" : impl_definition, + "cpp_if" : cpp_if, + "cpp_endif" : cpp_endif, + "methoddef_ifndef" : methoddef_ifndef, + } + + # make sure we didn't forget to assign something, + # and wrap each non-empty value in \n's + d2 = {} + for name, value in d.items(): + assert value is not None, "got a None value for template " + repr(name) + if value: + value = '\n' + value + '\n' + d2[name] = value + return d2 + + @staticmethod + def group_to_variable_name(group): + adjective = "left_" if group < 0 else "right_" + return "group_" + adjective + str(abs(group)) + + def render_option_group_parsing(self, f, template_dict): + # positional only, grouped, optional arguments! + # can be optional on the left or right. + # here's an example: + # + # [ [ [ A1 A2 ] B1 B2 B3 ] C1 C2 ] D1 D2 D3 [ E1 E2 E3 [ F1 F2 F3 ] ] + # + # Here group D are required, and all other groups are optional. + # (Group D's "group" is actually None.) + # We can figure out which sets of arguments we have based on + # how many arguments are in the tuple. + # + # Note that you need to count up on both sides. For example, + # you could have groups C+D, or C+D+E, or C+D+E+F. + # + # What if the number of arguments leads us to an ambiguous result? + # Clinic prefers groups on the left. So in the above example, + # five arguments would map to B+C, not C+D. + + add, output = text_accumulator() + parameters = list(f.parameters.values()) + if isinstance(parameters[0].converter, self_converter): + del parameters[0] + + groups = [] + group = None + left = [] + right = [] + required = [] + last = unspecified + + for p in parameters: + group_id = p.group + if group_id != last: + last = group_id + group = [] + if group_id < 0: + left.append(group) + elif group_id == 0: + group = required + else: + right.append(group) + group.append(p) + + count_min = sys.maxsize + count_max = -1 + + add("switch (PyTuple_GET_SIZE(args)) {{\n") + for subset in permute_optional_groups(left, required, right): + count = len(subset) + count_min = min(count_min, count) + count_max = max(count_max, count) + + if count == 0: + add(""" case 0: + break; +""") + continue + + group_ids = {p.group for p in subset} # eliminate duplicates + d = {} + d['count'] = count + d['name'] = f.name + d['groups'] = sorted(group_ids) + d['format_units'] = "".join(p.converter.format_unit for p in subset) + + parse_arguments = [] + for p in subset: + p.converter.parse_argument(parse_arguments) + d['parse_arguments'] = ", ".join(parse_arguments) + + group_ids.discard(0) + lines = [self.group_to_variable_name(g) + " = 1;" for g in group_ids] + lines = "\n".join(lines) + + s = """ + case {count}: + if (!PyArg_ParseTuple(args, "{format_units}:{name}", {parse_arguments})) + goto exit; + {group_booleans} + break; +"""[1:] + s = linear_format(s, group_booleans=lines) + s = s.format_map(d) + add(s) + + add(" default:\n") + s = ' PyErr_SetString(PyExc_TypeError, "{} requires {} to {} arguments");\n' + add(s.format(f.full_name, count_min, count_max)) + add(' goto exit;\n') + add("}}") + template_dict['option_group_parsing'] = output() + + def render_function(self, clinic, f): + if not f: + return "" + + add, output = text_accumulator() + data = CRenderData() + + assert f.parameters, "We should always have a 'self' at this point!" + parameters = f.render_parameters + converters = [p.converter for p in parameters] + + templates = self.output_templates(f) + + f_self = parameters[0] + selfless = parameters[1:] + assert isinstance(f_self.converter, self_converter), "No self parameter in " + repr(f.full_name) + "!" + + last_group = 0 + first_optional = len(selfless) + positional = selfless and selfless[-1].kind == inspect.Parameter.POSITIONAL_ONLY + new_or_init = f.kind in (METHOD_NEW, METHOD_INIT) + default_return_converter = (not f.return_converter or + f.return_converter.type == 'PyObject *') + has_option_groups = False + + # offset i by -1 because first_optional needs to ignore self + for i, p in enumerate(parameters, -1): + c = p.converter + + if (i != -1) and (p.default is not unspecified): + first_optional = min(first_optional, i) + + # insert group variable + group = p.group + if last_group != group: + last_group = group + if group: + group_name = self.group_to_variable_name(group) + data.impl_arguments.append(group_name) + data.declarations.append("int " + group_name + " = 0;") + data.impl_parameters.append("int " + group_name) + has_option_groups = True + + c.render(p, data) + + if has_option_groups and (not positional): + fail("You cannot use optional groups ('[' and ']')\nunless all parameters are positional-only ('/').") + + # HACK + # when we're METH_O, but have a custom return converter, + # we use "impl_parameters" for the parsing function + # because that works better. but that means we must + # supress actually declaring the impl's parameters + # as variables in the parsing function. but since it's + # METH_O, we have exactly one anyway, so we know exactly + # where it is. + if ("METH_O" in templates['methoddef_define'] and + not default_return_converter): + data.declarations.pop(0) + + template_dict = {} + + full_name = f.full_name + template_dict['full_name'] = full_name + + if new_or_init: + name = f.cls.name + else: + name = f.name + + template_dict['name'] = name + + if f.c_basename: + c_basename = f.c_basename + else: + fields = full_name.split(".") + if fields[-1] == '__new__': + fields.pop() + c_basename = "_".join(fields) + + template_dict['c_basename'] = c_basename + + methoddef_name = "{}_METHODDEF".format(c_basename.upper()) + template_dict['methoddef_name'] = methoddef_name + + template_dict['docstring'] = self.docstring_for_c_string(f) + + template_dict['self_name'] = template_dict['self_type'] = template_dict['self_type_check'] = '' + f_self.converter.set_template_dict(template_dict) + + f.return_converter.render(f, data) + template_dict['impl_return_type'] = f.return_converter.type + + template_dict['declarations'] = "\n".join(data.declarations) + template_dict['initializers'] = "\n\n".join(data.initializers) + template_dict['modifications'] = '\n\n'.join(data.modifications) + template_dict['keywords'] = '"' + '", "'.join(data.keywords) + '"' + template_dict['format_units'] = ''.join(data.format_units) + template_dict['parse_arguments'] = ', '.join(data.parse_arguments) + template_dict['impl_parameters'] = ", ".join(data.impl_parameters) + template_dict['impl_arguments'] = ", ".join(data.impl_arguments) + template_dict['return_conversion'] = "".join(data.return_conversion).rstrip() + template_dict['cleanup'] = "".join(data.cleanup) + template_dict['return_value'] = data.return_value + + # used by unpack tuple code generator + ignore_self = -1 if isinstance(converters[0], self_converter) else 0 + unpack_min = first_optional + unpack_max = len(selfless) + template_dict['unpack_min'] = str(unpack_min) + template_dict['unpack_max'] = str(unpack_max) + + if has_option_groups: + self.render_option_group_parsing(f, template_dict) + + for name, destination in clinic.field_destinations.items(): + template = templates[name] + if has_option_groups: + template = linear_format(template, + option_group_parsing=template_dict['option_group_parsing']) + template = linear_format(template, + declarations=template_dict['declarations'], + return_conversion=template_dict['return_conversion'], + initializers=template_dict['initializers'], + modifications=template_dict['modifications'], + cleanup=template_dict['cleanup'], + ) + + # Only generate the "exit:" label + # if we have any gotos + need_exit_label = "goto exit;" in template + template = linear_format(template, + exit_label="exit:" if need_exit_label else '' + ) + + s = template.format_map(template_dict) + + if clinic.line_prefix: + s = indent_all_lines(s, clinic.line_prefix) + if clinic.line_suffix: + s = suffix_all_lines(s, clinic.line_suffix) + + destination.append(s) + + return clinic.get_destination('block').dump() + + + + +@contextlib.contextmanager +def OverrideStdioWith(stdout): + saved_stdout = sys.stdout + sys.stdout = stdout + try: + yield + finally: + assert sys.stdout is stdout + sys.stdout = saved_stdout + + +def create_regex(before, after, word=True, whole_line=True): + """Create an re object for matching marker lines.""" + group_re = "\w+" if word else ".+" + pattern = r'{}({}){}' + if whole_line: + pattern = '^' + pattern + '$' + pattern = pattern.format(re.escape(before), group_re, re.escape(after)) + return re.compile(pattern) + + +class Block: + r""" + Represents a single block of text embedded in + another file. If dsl_name is None, the block represents + verbatim text, raw original text from the file, in + which case "input" will be the only non-false member. + If dsl_name is not None, the block represents a Clinic + block. + + input is always str, with embedded \n characters. + input represents the original text from the file; + if it's a Clinic block, it is the original text with + the body_prefix and redundant leading whitespace removed. + + dsl_name is either str or None. If str, it's the text + found on the start line of the block between the square + brackets. + + signatures is either list or None. If it's a list, + it may only contain clinic.Module, clinic.Class, and + clinic.Function objects. At the moment it should + contain at most one of each. + + output is either str or None. If str, it's the output + from this block, with embedded '\n' characters. + + indent is either str or None. It's the leading whitespace + that was found on every line of input. (If body_prefix is + not empty, this is the indent *after* removing the + body_prefix.) + + preindent is either str or None. It's the whitespace that + was found in front of every line of input *before* the + "body_prefix" (see the Language object). If body_prefix + is empty, preindent must always be empty too. + + To illustrate indent and preindent: Assume that '_' + represents whitespace. If the block processed was in a + Python file, and looked like this: + ____#/*[python] + ____#__for a in range(20): + ____#____print(a) + ____#[python]*/ + "preindent" would be "____" and "indent" would be "__". + + """ + def __init__(self, input, dsl_name=None, signatures=None, output=None, indent='', preindent=''): + assert isinstance(input, str) + self.input = input + self.dsl_name = dsl_name + self.signatures = signatures or [] + self.output = output + self.indent = indent + self.preindent = preindent + + def __repr__(self): + dsl_name = self.dsl_name or "text" + def summarize(s): + s = repr(s) + if len(s) > 30: + return s[:26] + "..." + s[0] + return s + return "".join(( + "<Block ", dsl_name, " input=", summarize(self.input), " output=", summarize(self.output), ">")) + + +class BlockParser: + """ + Block-oriented parser for Argument Clinic. + Iterator, yields Block objects. + """ + + def __init__(self, input, language, *, verify=True): + """ + "input" should be a str object + with embedded \n characters. + + "language" should be a Language object. + """ + language.validate() + + self.input = collections.deque(reversed(input.splitlines(keepends=True))) + self.block_start_line_number = self.line_number = 0 + + self.language = language + before, _, after = language.start_line.partition('{dsl_name}') + assert _ == '{dsl_name}' + self.find_start_re = create_regex(before, after, whole_line=False) + self.start_re = create_regex(before, after) + self.verify = verify + self.last_checksum_re = None + self.last_dsl_name = None + self.dsl_name = None + self.first_block = True + + def __iter__(self): + return self + + def __next__(self): + while True: + if not self.input: + raise StopIteration + + if self.dsl_name: + return_value = self.parse_clinic_block(self.dsl_name) + self.dsl_name = None + self.first_block = False + return return_value + block = self.parse_verbatim_block() + if self.first_block and not block.input: + continue + self.first_block = False + return block + + + def is_start_line(self, line): + match = self.start_re.match(line.lstrip()) + return match.group(1) if match else None + + def _line(self): + self.line_number += 1 + line = self.input.pop() + self.language.parse_line(line) + return line + + def parse_verbatim_block(self): + add, output = text_accumulator() + self.block_start_line_number = self.line_number + + while self.input: + line = self._line() + dsl_name = self.is_start_line(line) + if dsl_name: + self.dsl_name = dsl_name + break + add(line) + + return Block(output()) + + def parse_clinic_block(self, dsl_name): + input_add, input_output = text_accumulator() + self.block_start_line_number = self.line_number + 1 + stop_line = self.language.stop_line.format(dsl_name=dsl_name) + body_prefix = self.language.body_prefix.format(dsl_name=dsl_name) + + def is_stop_line(line): + # make sure to recognize stop line even if it + # doesn't end with EOL (it could be the very end of the file) + if not line.startswith(stop_line): + return False + remainder = line[len(stop_line):] + return (not remainder) or remainder.isspace() + + # consume body of program + while self.input: + line = self._line() + if is_stop_line(line) or self.is_start_line(line): + break + if body_prefix: + line = line.lstrip() + assert line.startswith(body_prefix) + line = line[len(body_prefix):] + input_add(line) + + # consume output and checksum line, if present. + if self.last_dsl_name == dsl_name: + checksum_re = self.last_checksum_re + else: + before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}') + assert _ == '{arguments}' + checksum_re = create_regex(before, after, word=False) + self.last_dsl_name = dsl_name + self.last_checksum_re = checksum_re + + # scan forward for checksum line + output_add, output_output = text_accumulator() + arguments = None + while self.input: + line = self._line() + match = checksum_re.match(line.lstrip()) + arguments = match.group(1) if match else None + if arguments: + break + output_add(line) + if self.is_start_line(line): + break + + output = output_output() + if arguments: + d = {} + for field in shlex.split(arguments): + name, equals, value = field.partition('=') + if not equals: + fail("Mangled Argument Clinic marker line: {!r}".format(line)) + d[name.strip()] = value.strip() + + if self.verify: + if 'input' in d: + checksum = d['output'] + input_checksum = d['input'] + else: + checksum = d['checksum'] + input_checksum = None + + computed = compute_checksum(output, len(checksum)) + if checksum != computed: + fail("Checksum mismatch!\nExpected: {}\nComputed: {}\n" + "Suggested fix: remove all generated code including " + "the end marker,\n" + "or use the '-f' option." + .format(checksum, computed)) + else: + # put back output + output_lines = output.splitlines(keepends=True) + self.line_number -= len(output_lines) + self.input.extend(reversed(output_lines)) + output = None + + return Block(input_output(), dsl_name, output=output) + + +class BlockPrinter: + + def __init__(self, language, f=None): + self.language = language + self.f = f or io.StringIO() + + def print_block(self, block): + input = block.input + output = block.output + dsl_name = block.dsl_name + write = self.f.write + + assert not ((dsl_name == None) ^ (output == None)), "you must specify dsl_name and output together, dsl_name " + repr(dsl_name) + + if not dsl_name: + write(input) + return + + write(self.language.start_line.format(dsl_name=dsl_name)) + write("\n") + + body_prefix = self.language.body_prefix.format(dsl_name=dsl_name) + if not body_prefix: + write(input) + else: + for line in input.split('\n'): + write(body_prefix) + write(line) + write("\n") + + write(self.language.stop_line.format(dsl_name=dsl_name)) + write("\n") + + input = ''.join(block.input) + output = ''.join(block.output) + if output: + if not output.endswith('\n'): + output += '\n' + write(output) + + arguments="output={} input={}".format(compute_checksum(output, 16), compute_checksum(input, 16)) + write(self.language.checksum_line.format(dsl_name=dsl_name, arguments=arguments)) + write("\n") + + def write(self, text): + self.f.write(text) + + +class Destination: + def __init__(self, name, type, clinic, *args): + self.name = name + self.type = type + self.clinic = clinic + valid_types = ('buffer', 'file', 'suppress', 'two-pass') + if type not in valid_types: + fail("Invalid destination type " + repr(type) + " for " + name + " , must be " + ', '.join(valid_types)) + extra_arguments = 1 if type == "file" else 0 + if len(args) < extra_arguments: + fail("Not enough arguments for destination " + name + " new " + type) + if len(args) > extra_arguments: + fail("Too many arguments for destination " + name + " new " + type) + if type =='file': + d = {} + filename = clinic.filename + d['path'] = filename + dirname, basename = os.path.split(filename) + if not dirname: + dirname = '.' + d['dirname'] = dirname + d['basename'] = basename + d['basename_root'], d['basename_extension'] = os.path.splitext(filename) + self.filename = args[0].format_map(d) + if type == 'two-pass': + self.id = None + + self.text, self.append, self._dump = _text_accumulator() + + def __repr__(self): + if self.type == 'file': + file_repr = " " + repr(self.filename) + else: + file_repr = '' + return "".join(("<Destination ", self.name, " ", self.type, file_repr, ">")) + + def clear(self): + if self.type != 'buffer': + fail("Can't clear destination" + self.name + " , it's not of type buffer") + self.text.clear() + + def dump(self): + if self.type == 'two-pass': + if self.id is None: + self.id = str(uuid.uuid4()) + return self.id + fail("You can only dump a two-pass buffer exactly once!") + return self._dump() + + +# maps strings to Language objects. +# "languages" maps the name of the language ("C", "Python"). +# "extensions" maps the file extension ("c", "py"). +languages = { 'C': CLanguage, 'Python': PythonLanguage } +extensions = { name: CLanguage for name in "c cc cpp cxx h hh hpp hxx".split() } +extensions['py'] = PythonLanguage + + +# maps strings to callables. +# these callables must be of the form: +# def foo(name, default, *, ...) +# The callable may have any number of keyword-only parameters. +# The callable must return a CConverter object. +# The callable should not call builtins.print. +converters = {} + +# maps strings to callables. +# these callables follow the same rules as those for "converters" above. +# note however that they will never be called with keyword-only parameters. +legacy_converters = {} + + +# maps strings to callables. +# these callables must be of the form: +# def foo(*, ...) +# The callable may have any number of keyword-only parameters. +# The callable must return a CConverter object. +# The callable should not call builtins.print. +return_converters = {} + +clinic = None +class Clinic: + + presets_text = """ +preset block +everything block +docstring_prototype suppress +parser_prototype suppress +cpp_if suppress +cpp_endif suppress +methoddef_ifndef buffer + +preset original +everything block +docstring_prototype suppress +parser_prototype suppress +cpp_if suppress +cpp_endif suppress +methoddef_ifndef buffer + +preset file +everything file +docstring_prototype suppress +parser_prototype suppress +impl_definition block + +preset buffer +everything buffer +docstring_prototype suppress +impl_prototype suppress +parser_prototype suppress +impl_definition block + +preset partial-buffer +everything buffer +docstring_prototype block +impl_prototype suppress +methoddef_define block +parser_prototype block +impl_definition block + +preset two-pass +everything buffer +docstring_prototype two-pass +impl_prototype suppress +methoddef_define two-pass +parser_prototype two-pass +impl_definition block + +""" + + def __init__(self, language, printer=None, *, force=False, verify=True, filename=None): + # maps strings to Parser objects. + # (instantiated from the "parsers" global.) + self.parsers = {} + self.language = language + if printer: + fail("Custom printers are broken right now") + self.printer = printer or BlockPrinter(language) + self.verify = verify + self.force = force + self.filename = filename + self.modules = collections.OrderedDict() + self.classes = collections.OrderedDict() + self.functions = [] + + self.line_prefix = self.line_suffix = '' + + self.destinations = {} + self.add_destination("block", "buffer") + self.add_destination("suppress", "suppress") + self.add_destination("buffer", "buffer") + self.add_destination("two-pass", "two-pass") + if filename: + self.add_destination("file", "file", "{dirname}/clinic/{basename}.h") + + d = self.destinations.get + self.field_destinations = collections.OrderedDict(( + ('cpp_if', d('suppress')), + ('docstring_prototype', d('suppress')), + ('docstring_definition', d('block')), + ('methoddef_define', d('block')), + ('impl_prototype', d('block')), + ('parser_prototype', d('suppress')), + ('parser_definition', d('block')), + ('cpp_endif', d('suppress')), + ('methoddef_ifndef', d('buffer')), + ('impl_definition', d('block')), + )) + + self.field_destinations_stack = [] + + self.presets = {} + preset = None + for line in self.presets_text.strip().split('\n'): + line = line.strip() + if not line: + continue + name, value = line.split() + if name == 'preset': + self.presets[value] = preset = collections.OrderedDict() + continue + + destination = self.get_destination(value) + + if name == 'everything': + for name in self.field_destinations: + preset[name] = destination + continue + + assert name in self.field_destinations + preset[name] = destination + + global clinic + clinic = self + + def get_destination(self, name, default=unspecified): + d = self.destinations.get(name) + if not d: + if default is not unspecified: + return default + fail("Destination does not exist: " + repr(name)) + return d + + def add_destination(self, name, type, *args): + if name in self.destinations: + fail("Destination already exists: " + repr(name)) + self.destinations[name] = Destination(name, type, self, *args) + + def parse(self, input): + printer = self.printer + self.block_parser = BlockParser(input, self.language, verify=self.verify) + for block in self.block_parser: + dsl_name = block.dsl_name + if dsl_name: + if dsl_name not in self.parsers: + assert dsl_name in parsers, "No parser to handle {!r} block.".format(dsl_name) + self.parsers[dsl_name] = parsers[dsl_name](self) + parser = self.parsers[dsl_name] + try: + parser.parse(block) + except Exception: + fail('Exception raised during parsing:\n' + + traceback.format_exc().rstrip()) + printer.print_block(block) + + second_pass_replacements = {} + + for name, destination in self.destinations.items(): + if destination.type == 'suppress': + continue + output = destination._dump() + + if destination.type == 'two-pass': + if destination.id: + second_pass_replacements[destination.id] = output + elif output: + fail("Two-pass buffer " + repr(name) + " not empty at end of file!") + continue + + if output: + + block = Block("", dsl_name="clinic", output=output) + + if destination.type == 'buffer': + block.input = "dump " + name + "\n" + warn("Destination buffer " + repr(name) + " not empty at end of file, emptying.") + printer.write("\n") + printer.print_block(block) + continue + + if destination.type == 'file': + try: + dirname = os.path.dirname(destination.filename) + try: + os.makedirs(dirname) + except FileExistsError: + if not os.path.isdir(dirname): + fail("Can't write to destination {}, " + "can't make directory {}!".format( + destination.filename, dirname)) + if self.verify: + with open(destination.filename, "rt") as f: + parser_2 = BlockParser(f.read(), language=self.language) + blocks = list(parser_2) + if (len(blocks) != 1) or (blocks[0].input != 'preserve\n'): + fail("Modified destination file " + repr(destination.filename) + ", not overwriting!") + except FileNotFoundError: + pass + + block.input = 'preserve\n' + printer_2 = BlockPrinter(self.language) + printer_2.print_block(block) + with open(destination.filename, "wt") as f: + f.write(printer_2.f.getvalue()) + continue + text = printer.f.getvalue() + + if second_pass_replacements: + printer_2 = BlockPrinter(self.language) + parser_2 = BlockParser(text, self.language) + changed = False + for block in parser_2: + if block.dsl_name: + for id, replacement in second_pass_replacements.items(): + if id in block.output: + changed = True + block.output = block.output.replace(id, replacement) + printer_2.print_block(block) + if changed: + text = printer_2.f.getvalue() + + return text + + + def _module_and_class(self, fields): + """ + fields should be an iterable of field names. + returns a tuple of (module, class). + the module object could actually be self (a clinic object). + this function is only ever used to find the parent of where + a new class/module should go. + """ + in_classes = False + parent = module = self + cls = None + so_far = [] + + for field in fields: + so_far.append(field) + if not in_classes: + child = parent.modules.get(field) + if child: + parent = module = child + continue + in_classes = True + if not hasattr(parent, 'classes'): + return module, cls + child = parent.classes.get(field) + if not child: + fail('Parent class or module ' + '.'.join(so_far) + " does not exist.") + cls = parent = child + + return module, cls + + +def parse_file(filename, *, force=False, verify=True, output=None, encoding='utf-8'): + extension = os.path.splitext(filename)[1][1:] + if not extension: + fail("Can't extract file type for file " + repr(filename)) + + try: + language = extensions[extension](filename) + except KeyError: + fail("Can't identify file type for file " + repr(filename)) + + with open(filename, 'r', encoding=encoding) as f: + raw = f.read() + + # exit quickly if there are no clinic markers in the file + find_start_re = BlockParser("", language).find_start_re + if not find_start_re.search(raw): + return + + clinic = Clinic(language, force=force, verify=verify, filename=filename) + cooked = clinic.parse(raw) + if (cooked == raw) and not force: + return + + directory = os.path.dirname(filename) or '.' + + with tempfile.TemporaryDirectory(prefix="clinic", dir=directory) as tmpdir: + bytes = cooked.encode(encoding) + tmpfilename = os.path.join(tmpdir, os.path.basename(filename)) + with open(tmpfilename, "wb") as f: + f.write(bytes) + os.replace(tmpfilename, output or filename) + + +def compute_checksum(input, length=None): + input = input or '' + s = hashlib.sha1(input.encode('utf-8')).hexdigest() + if length: + s = s[:length] + return s + + + + +class PythonParser: + def __init__(self, clinic): + pass + + def parse(self, block): + s = io.StringIO() + with OverrideStdioWith(s): + exec(block.input) + block.output = s.getvalue() + + +class Module: + def __init__(self, name, module=None): + self.name = name + self.module = self.parent = module + + self.modules = collections.OrderedDict() + self.classes = collections.OrderedDict() + self.functions = [] + + def __repr__(self): + return "<clinic.Module " + repr(self.name) + " at " + str(id(self)) + ">" + +class Class: + def __init__(self, name, module=None, cls=None, typedef=None, type_object=None): + self.name = name + self.module = module + self.cls = cls + self.typedef = typedef + self.type_object = type_object + self.parent = cls or module + + self.classes = collections.OrderedDict() + self.functions = [] + + def __repr__(self): + return "<clinic.Class " + repr(self.name) + " at " + str(id(self)) + ">" + +unsupported_special_methods = set(""" + +__abs__ +__add__ +__and__ +__bytes__ +__call__ +__complex__ +__delitem__ +__divmod__ +__eq__ +__float__ +__floordiv__ +__ge__ +__getattr__ +__getattribute__ +__getitem__ +__gt__ +__hash__ +__iadd__ +__iand__ +__idivmod__ +__ifloordiv__ +__ilshift__ +__imod__ +__imul__ +__index__ +__int__ +__invert__ +__ior__ +__ipow__ +__irshift__ +__isub__ +__iter__ +__itruediv__ +__ixor__ +__le__ +__len__ +__lshift__ +__lt__ +__mod__ +__mul__ +__neg__ +__new__ +__next__ +__or__ +__pos__ +__pow__ +__radd__ +__rand__ +__rdivmod__ +__repr__ +__rfloordiv__ +__rlshift__ +__rmod__ +__rmul__ +__ror__ +__round__ +__rpow__ +__rrshift__ +__rshift__ +__rsub__ +__rtruediv__ +__rxor__ +__setattr__ +__setitem__ +__str__ +__sub__ +__truediv__ +__xor__ + +""".strip().split()) + + +INVALID, CALLABLE, STATIC_METHOD, CLASS_METHOD, METHOD_INIT, METHOD_NEW = """ +INVALID, CALLABLE, STATIC_METHOD, CLASS_METHOD, METHOD_INIT, METHOD_NEW +""".replace(",", "").strip().split() + +class Function: + """ + Mutable duck type for inspect.Function. + + docstring - a str containing + * embedded line breaks + * text outdented to the left margin + * no trailing whitespace. + It will always be true that + (not docstring) or ((not docstring[0].isspace()) and (docstring.rstrip() == docstring)) + """ + + def __init__(self, parameters=None, *, name, + module, cls=None, c_basename=None, + full_name=None, + return_converter, return_annotation=_empty, + docstring=None, kind=CALLABLE, coexist=False, + docstring_only=False): + self.parameters = parameters or collections.OrderedDict() + self.return_annotation = return_annotation + self.name = name + self.full_name = full_name + self.module = module + self.cls = cls + self.parent = cls or module + self.c_basename = c_basename + self.return_converter = return_converter + self.docstring = docstring or '' + self.kind = kind + self.coexist = coexist + self.self_converter = None + # docstring_only means "don't generate a machine-readable + # signature, just a normal docstring". it's True for + # functions with optional groups because we can't represent + # those accurately with inspect.Signature in 3.4. + self.docstring_only = docstring_only + + self.rendered_parameters = None + + __render_parameters__ = None + @property + def render_parameters(self): + if not self.__render_parameters__: + self.__render_parameters__ = l = [] + for p in self.parameters.values(): + p = p.copy() + p.converter.pre_render() + l.append(p) + return self.__render_parameters__ + + @property + def methoddef_flags(self): + if self.kind in (METHOD_INIT, METHOD_NEW): + return None + flags = [] + if self.kind == CLASS_METHOD: + flags.append('METH_CLASS') + elif self.kind == STATIC_METHOD: + flags.append('METH_STATIC') + else: + assert self.kind == CALLABLE, "unknown kind: " + repr(self.kind) + if self.coexist: + flags.append('METH_COEXIST') + return '|'.join(flags) + + def __repr__(self): + return '<clinic.Function ' + self.name + '>' + + def copy(self, **overrides): + kwargs = { + 'name': self.name, 'module': self.module, 'parameters': self.parameters, + 'cls': self.cls, 'c_basename': self.c_basename, + 'full_name': self.full_name, + 'return_converter': self.return_converter, 'return_annotation': self.return_annotation, + 'docstring': self.docstring, 'kind': self.kind, 'coexist': self.coexist, + 'docstring_only': self.docstring_only, + } + kwargs.update(overrides) + f = Function(**kwargs) + + parameters = collections.OrderedDict() + for name, value in f.parameters.items(): + value = value.copy(function=f) + parameters[name] = value + f.parameters = parameters + return f + + +class Parameter: + """ + Mutable duck type of inspect.Parameter. + """ + + def __init__(self, name, kind, *, default=_empty, + function, converter, annotation=_empty, + docstring=None, group=0): + self.name = name + self.kind = kind + self.default = default + self.function = function + self.converter = converter + self.annotation = annotation + self.docstring = docstring or '' + self.group = group + + def __repr__(self): + return '<clinic.Parameter ' + self.name + '>' + + def is_keyword_only(self): + return self.kind == inspect.Parameter.KEYWORD_ONLY + + def is_positional_only(self): + return self.kind == inspect.Parameter.POSITIONAL_ONLY + + def copy(self, **overrides): + kwargs = { + 'name': self.name, 'kind': self.kind, 'default':self.default, + 'function': self.function, 'converter': self.converter, 'annotation': self.annotation, + 'docstring': self.docstring, 'group': self.group, + } + kwargs.update(overrides) + if 'converter' not in overrides: + converter = copy.copy(self.converter) + converter.function = kwargs['function'] + kwargs['converter'] = converter + return Parameter(**kwargs) + + + +class LandMine: + # try to access any + def __init__(self, message): + self.__message__ = message + + def __repr__(self): + return '<LandMine ' + repr(self.__message__) + ">" + + def __getattribute__(self, name): + if name in ('__repr__', '__message__'): + return super().__getattribute__(name) + # raise RuntimeError(repr(name)) + fail("Stepped on a land mine, trying to access attribute " + repr(name) + ":\n" + self.__message__) + + +def add_c_converter(f, name=None): + if not name: + name = f.__name__ + if not name.endswith('_converter'): + return f + name = name[:-len('_converter')] + converters[name] = f + return f + +def add_default_legacy_c_converter(cls): + # automatically add converter for default format unit + # (but without stomping on the existing one if it's already + # set, in case you subclass) + if ((cls.format_unit != 'O&') and + (cls.format_unit not in legacy_converters)): + legacy_converters[cls.format_unit] = cls + if cls.format_unit: + legacy_converters[cls.format_unit] = cls + return cls + +def add_legacy_c_converter(format_unit, **kwargs): + """ + Adds a legacy converter. + """ + def closure(f): + if not kwargs: + added_f = f + else: + added_f = functools.partial(f, **kwargs) + if format_unit: + legacy_converters[format_unit] = added_f + return f + return closure + +class CConverterAutoRegister(type): + def __init__(cls, name, bases, classdict): + add_c_converter(cls) + add_default_legacy_c_converter(cls) + +class CConverter(metaclass=CConverterAutoRegister): + """ + For the init function, self, name, function, and default + must be keyword-or-positional parameters. All other + parameters must be keyword-only. + """ + + # The C name to use for this variable. + name = None + + # The Python name to use for this variable. + py_name = None + + # The C type to use for this variable. + # 'type' should be a Python string specifying the type, e.g. "int". + # If this is a pointer type, the type string should end with ' *'. + type = None + + # The Python default value for this parameter, as a Python value. + # Or the magic value "unspecified" if there is no default. + # Or the magic value "unknown" if this value is a cannot be evaluated + # at Argument-Clinic-preprocessing time (but is presumed to be valid + # at runtime). + default = unspecified + + # If not None, default must be isinstance() of this type. + # (You can also specify a tuple of types.) + default_type = None + + # "default" converted into a C value, as a string. + # Or None if there is no default. + c_default = None + + # "default" converted into a Python value, as a string. + # Or None if there is no default. + py_default = None + + # The default value used to initialize the C variable when + # there is no default, but not specifying a default may + # result in an "uninitialized variable" warning. This can + # easily happen when using option groups--although + # properly-written code won't actually use the variable, + # the variable does get passed in to the _impl. (Ah, if + # only dataflow analysis could inline the static function!) + # + # This value is specified as a string. + # Every non-abstract subclass should supply a valid value. + c_ignored_default = 'NULL' + + # The C converter *function* to be used, if any. + # (If this is not None, format_unit must be 'O&'.) + converter = None + + # Should Argument Clinic add a '&' before the name of + # the variable when passing it into the _impl function? + impl_by_reference = False + + # Should Argument Clinic add a '&' before the name of + # the variable when passing it into PyArg_ParseTuple (AndKeywords)? + parse_by_reference = True + + ############################################################# + ############################################################# + ## You shouldn't need to read anything below this point to ## + ## write your own converter functions. ## + ############################################################# + ############################################################# + + # The "format unit" to specify for this variable when + # parsing arguments using PyArg_ParseTuple (AndKeywords). + # Custom converters should always use the default value of 'O&'. + format_unit = 'O&' + + # What encoding do we want for this variable? Only used + # by format units starting with 'e'. + encoding = None + + # Should this object be required to be a subclass of a specific type? + # If not None, should be a string representing a pointer to a + # PyTypeObject (e.g. "&PyUnicode_Type"). + # Only used by the 'O!' format unit (and the "object" converter). + subclass_of = None + + # Do we want an adjacent '_length' variable for this variable? + # Only used by format units ending with '#'. + length = False + + # Should we show this parameter in the generated + # __text_signature__? This is *almost* always True. + # (It's only False for __new__, __init__, and METH_STATIC functions.) + show_in_signature = True + + # Overrides the name used in a text signature. + # The name used for a "self" parameter must be one of + # self, type, or module; however users can set their own. + # This lets the self_converter overrule the user-settable + # name, *just* for the text signature. + # Only set by self_converter. + signature_name = None + + # keep in sync with self_converter.__init__! + def __init__(self, name, py_name, function, default=unspecified, *, c_default=None, py_default=None, annotation=unspecified, **kwargs): + self.name = name + self.py_name = py_name + + if default is not unspecified: + if self.default_type and not isinstance(default, (self.default_type, Unknown)): + if isinstance(self.default_type, type): + types_str = self.default_type.__name__ + else: + types_str = ', '.join((cls.__name__ for cls in self.default_type)) + fail("{}: default value {!r} for field {} is not of type {}".format( + self.__class__.__name__, default, name, types_str)) + self.default = default + + if c_default: + self.c_default = c_default + if py_default: + self.py_default = py_default + + if annotation != unspecified: + fail("The 'annotation' parameter is not currently permitted.") + + # this is deliberate, to prevent you from caching information + # about the function in the init. + # (that breaks if we get cloned.) + # so after this change we will noisily fail. + self.function = LandMine("Don't access members of self.function inside converter_init!") + self.converter_init(**kwargs) + self.function = function + + def converter_init(self): + pass + + def is_optional(self): + return (self.default is not unspecified) + + def _render_self(self, parameter, data): + self.parameter = parameter + original_name = self.name + name = ensure_legal_c_identifier(original_name) + + # impl_arguments + s = ("&" if self.impl_by_reference else "") + name + data.impl_arguments.append(s) + if self.length: + data.impl_arguments.append(self.length_name()) + + # impl_parameters + data.impl_parameters.append(self.simple_declaration(by_reference=self.impl_by_reference)) + if self.length: + data.impl_parameters.append("Py_ssize_clean_t " + self.length_name()) + + def _render_non_self(self, parameter, data): + self.parameter = parameter + original_name = self.name + name = ensure_legal_c_identifier(original_name) + + # declarations + d = self.declaration() + data.declarations.append(d) + + # initializers + initializers = self.initialize() + if initializers: + data.initializers.append('/* initializers for ' + name + ' */\n' + initializers.rstrip()) + + # modifications + modifications = self.modify() + if modifications: + data.modifications.append('/* modifications for ' + name + ' */\n' + modifications.rstrip()) + + # keywords + data.keywords.append(parameter.name) + + # format_units + if self.is_optional() and '|' not in data.format_units: + data.format_units.append('|') + if parameter.is_keyword_only() and '$' not in data.format_units: + data.format_units.append('$') + data.format_units.append(self.format_unit) + + # parse_arguments + self.parse_argument(data.parse_arguments) + + # cleanup + cleanup = self.cleanup() + if cleanup: + data.cleanup.append('/* Cleanup for ' + name + ' */\n' + cleanup.rstrip() + "\n") + + def render(self, parameter, data): + """ + parameter is a clinic.Parameter instance. + data is a CRenderData instance. + """ + self._render_self(parameter, data) + self._render_non_self(parameter, data) + + def length_name(self): + """Computes the name of the associated "length" variable.""" + if not self.length: + return None + return ensure_legal_c_identifier(self.name) + "_length" + + # Why is this one broken out separately? + # For "positional-only" function parsing, + # which generates a bunch of PyArg_ParseTuple calls. + def parse_argument(self, list): + assert not (self.converter and self.encoding) + if self.format_unit == 'O&': + assert self.converter + list.append(self.converter) + + if self.encoding: + list.append(c_repr(self.encoding)) + elif self.subclass_of: + list.append(self.subclass_of) + + legal_name = ensure_legal_c_identifier(self.name) + s = ("&" if self.parse_by_reference else "") + legal_name + list.append(s) + + if self.length: + list.append("&" + self.length_name()) + + # + # All the functions after here are intended as extension points. + # + + def simple_declaration(self, by_reference=False): + """ + Computes the basic declaration of the variable. + Used in computing the prototype declaration and the + variable declaration. + """ + prototype = [self.type] + if by_reference or not self.type.endswith('*'): + prototype.append(" ") + if by_reference: + prototype.append('*') + prototype.append(ensure_legal_c_identifier(self.name)) + return "".join(prototype) + + def declaration(self): + """ + The C statement to declare this variable. + """ + declaration = [self.simple_declaration()] + default = self.c_default + if not default and self.parameter.group: + default = self.c_ignored_default + if default: + declaration.append(" = ") + declaration.append(default) + declaration.append(";") + if self.length: + declaration.append('\nPy_ssize_clean_t ') + declaration.append(self.length_name()) + declaration.append(';') + s = "".join(declaration) + # double up curly-braces, this string will be used + # as part of a format_map() template later + s = s.replace("{", "{{") + s = s.replace("}", "}}") + return s + + def initialize(self): + """ + The C statements required to set up this variable before parsing. + Returns a string containing this code indented at column 0. + If no initialization is necessary, returns an empty string. + """ + return "" + + def modify(self): + """ + The C statements required to modify this variable after parsing. + Returns a string containing this code indented at column 0. + If no initialization is necessary, returns an empty string. + """ + return "" + + def cleanup(self): + """ + The C statements required to clean up after this variable. + Returns a string containing this code indented at column 0. + If no cleanup is necessary, returns an empty string. + """ + return "" + + def pre_render(self): + """ + A second initialization function, like converter_init, + called just before rendering. + You are permitted to examine self.function here. + """ + pass + + +class bool_converter(CConverter): + type = 'int' + default_type = bool + format_unit = 'p' + c_ignored_default = '0' + + def converter_init(self): + if self.default is not unspecified: + self.default = bool(self.default) + self.c_default = str(int(self.default)) + +class char_converter(CConverter): + type = 'char' + default_type = str + format_unit = 'c' + c_ignored_default = "'\0'" + + def converter_init(self): + if isinstance(self.default, str) and (len(self.default) != 1): + fail("char_converter: illegal default value " + repr(self.default)) + + +@add_legacy_c_converter('B', bitwise=True) +class unsigned_char_converter(CConverter): + type = 'unsigned char' + default_type = int + format_unit = 'b' + c_ignored_default = "'\0'" + + def converter_init(self, *, bitwise=False): + if bitwise: + self.format_unit = 'B' + +class byte_converter(unsigned_char_converter): pass + +class short_converter(CConverter): + type = 'short' + default_type = int + format_unit = 'h' + c_ignored_default = "0" + +class unsigned_short_converter(CConverter): + type = 'unsigned short' + default_type = int + format_unit = 'H' + c_ignored_default = "0" + + def converter_init(self, *, bitwise=False): + if not bitwise: + fail("Unsigned shorts must be bitwise (for now).") + +@add_legacy_c_converter('C', types='str') +class int_converter(CConverter): + type = 'int' + default_type = int + format_unit = 'i' + c_ignored_default = "0" + + def converter_init(self, *, types='int'): + if types == 'str': + self.format_unit = 'C' + elif types != 'int': + fail("int_converter: illegal 'types' argument") + +class unsigned_int_converter(CConverter): + type = 'unsigned int' + default_type = int + format_unit = 'I' + c_ignored_default = "0" + + def converter_init(self, *, bitwise=False): + if not bitwise: + fail("Unsigned ints must be bitwise (for now).") + +class long_converter(CConverter): + type = 'long' + default_type = int + format_unit = 'l' + c_ignored_default = "0" + +class unsigned_long_converter(CConverter): + type = 'unsigned long' + default_type = int + format_unit = 'k' + c_ignored_default = "0" + + def converter_init(self, *, bitwise=False): + if not bitwise: + fail("Unsigned longs must be bitwise (for now).") + +class PY_LONG_LONG_converter(CConverter): + type = 'PY_LONG_LONG' + default_type = int + format_unit = 'L' + c_ignored_default = "0" + +class unsigned_PY_LONG_LONG_converter(CConverter): + type = 'unsigned PY_LONG_LONG' + default_type = int + format_unit = 'K' + c_ignored_default = "0" + + def converter_init(self, *, bitwise=False): + if not bitwise: + fail("Unsigned PY_LONG_LONGs must be bitwise (for now).") + +class Py_ssize_t_converter(CConverter): + type = 'Py_ssize_t' + default_type = int + format_unit = 'n' + c_ignored_default = "0" + + +class float_converter(CConverter): + type = 'float' + default_type = float + format_unit = 'f' + c_ignored_default = "0.0" + +class double_converter(CConverter): + type = 'double' + default_type = float + format_unit = 'd' + c_ignored_default = "0.0" + + +class Py_complex_converter(CConverter): + type = 'Py_complex' + default_type = complex + format_unit = 'D' + c_ignored_default = "{0.0, 0.0}" + + +class object_converter(CConverter): + type = 'PyObject *' + format_unit = 'O' + + def converter_init(self, *, converter=None, type=None, subclass_of=None): + if converter: + if subclass_of: + fail("object: Cannot pass in both 'converter' and 'subclass_of'") + self.format_unit = 'O&' + self.converter = converter + elif subclass_of: + self.format_unit = 'O!' + self.subclass_of = subclass_of + + if type is not None: + self.type = type + + +@add_legacy_c_converter('s#', length=True) +@add_legacy_c_converter('y', types="bytes") +@add_legacy_c_converter('y#', types="bytes", length=True) +@add_legacy_c_converter('z', nullable=True) +@add_legacy_c_converter('z#', nullable=True, length=True) +class str_converter(CConverter): + type = 'const char *' + default_type = (str, Null, NoneType) + format_unit = 's' + + def converter_init(self, *, encoding=None, types="str", + length=False, nullable=False, zeroes=False): + + types = set(types.strip().split()) + bytes_type = set(("bytes",)) + str_type = set(("str",)) + all_3_type = set(("bytearray",)) | bytes_type | str_type + is_bytes = types == bytes_type + is_str = types == str_type + is_all_3 = types == all_3_type + + self.length = bool(length) + format_unit = None + + if encoding: + self.encoding = encoding + + if is_str and not (length or zeroes or nullable): + format_unit = 'es' + elif is_all_3 and not (length or zeroes or nullable): + format_unit = 'et' + elif is_str and length and zeroes and not nullable: + format_unit = 'es#' + elif is_all_3 and length and not (nullable or zeroes): + format_unit = 'et#' + + if format_unit.endswith('#'): + fail("Sorry: code using format unit ", repr(format_unit), "probably doesn't work properly yet.\nGive Larry your test case and he'll it.") + # TODO set pointer to NULL + # TODO add cleanup for buffer + pass + + else: + if zeroes: + fail("str_converter: illegal combination of arguments (zeroes is only legal with an encoding)") + + if is_bytes and not (nullable or length): + format_unit = 'y' + elif is_bytes and length and not nullable: + format_unit = 'y#' + elif is_str and not (nullable or length): + format_unit = 's' + elif is_str and length and not nullable: + format_unit = 's#' + elif is_str and nullable and not length: + format_unit = 'z' + elif is_str and nullable and length: + format_unit = 'z#' + + if not format_unit: + fail("str_converter: illegal combination of arguments") + self.format_unit = format_unit + + +class PyBytesObject_converter(CConverter): + type = 'PyBytesObject *' + format_unit = 'S' + +class PyByteArrayObject_converter(CConverter): + type = 'PyByteArrayObject *' + format_unit = 'Y' + +class unicode_converter(CConverter): + type = 'PyObject *' + default_type = (str, Null, NoneType) + format_unit = 'U' + +@add_legacy_c_converter('u#', length=True) +@add_legacy_c_converter('Z', nullable=True) +@add_legacy_c_converter('Z#', nullable=True, length=True) +class Py_UNICODE_converter(CConverter): + type = 'Py_UNICODE *' + default_type = (str, Null, NoneType) + format_unit = 'u' + + def converter_init(self, *, nullable=False, length=False): + format_unit = 'Z' if nullable else 'u' + if length: + format_unit += '#' + self.length = True + self.format_unit = format_unit + +# +# We define three string conventions for buffer types in the 'types' argument: +# 'buffer' : any object supporting the buffer interface +# 'rwbuffer': any object supporting the buffer interface, but must be writeable +# 'robuffer': any object supporting the buffer interface, but must not be writeable +# +@add_legacy_c_converter('s*', types='str bytes bytearray buffer') +@add_legacy_c_converter('z*', types='str bytes bytearray buffer', nullable=True) +@add_legacy_c_converter('w*', types='bytearray rwbuffer') +class Py_buffer_converter(CConverter): + type = 'Py_buffer' + format_unit = 'y*' + impl_by_reference = True + c_ignored_default = "{NULL, NULL}" + + def converter_init(self, *, types='bytes bytearray buffer', nullable=False): + if self.default not in (unspecified, None): + fail("The only legal default value for Py_buffer is None.") + self.c_default = self.c_ignored_default + types = set(types.strip().split()) + bytes_type = set(('bytes',)) + bytearray_type = set(('bytearray',)) + buffer_type = set(('buffer',)) + rwbuffer_type = set(('rwbuffer',)) + robuffer_type = set(('robuffer',)) + str_type = set(('str',)) + bytes_bytearray_buffer_type = bytes_type | bytearray_type | buffer_type + + format_unit = None + if types == (str_type | bytes_bytearray_buffer_type): + format_unit = 's*' if not nullable else 'z*' + else: + if nullable: + fail('Py_buffer_converter: illegal combination of arguments (nullable=True)') + elif types == (bytes_bytearray_buffer_type): + format_unit = 'y*' + elif types == (bytearray_type | rwbuffer_type): + format_unit = 'w*' + if not format_unit: + fail("Py_buffer_converter: illegal combination of arguments") + + self.format_unit = format_unit + + def cleanup(self): + name = ensure_legal_c_identifier(self.name) + return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"]) + + +def correct_name_for_self(f): + if f.kind in (CALLABLE, METHOD_INIT): + if f.cls: + return "PyObject *", "self" + return "PyModuleDef *", "module" + if f.kind == STATIC_METHOD: + return "void *", "null" + if f.kind in (CLASS_METHOD, METHOD_NEW): + return "PyTypeObject *", "type" + raise RuntimeError("Unhandled type of function f: " + repr(f.kind)) + +def required_type_for_self_for_parser(f): + type, _ = correct_name_for_self(f) + if f.kind in (METHOD_INIT, METHOD_NEW, STATIC_METHOD, CLASS_METHOD): + return type + return None + + +class self_converter(CConverter): + """ + A special-case converter: + this is the default converter used for "self". + """ + type = None + format_unit = '' + + def converter_init(self, *, type=None): + self.specified_type = type + + def pre_render(self): + f = self.function + default_type, default_name = correct_name_for_self(f) + self.signature_name = default_name + self.type = self.specified_type or self.type or default_type + + kind = self.function.kind + new_or_init = kind in (METHOD_NEW, METHOD_INIT) + + if (kind == STATIC_METHOD) or new_or_init: + self.show_in_signature = False + + # tp_new (METHOD_NEW) functions are of type newfunc: + # typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *); + # PyTypeObject is a typedef for struct _typeobject. + # + # tp_init (METHOD_INIT) functions are of type initproc: + # typedef int (*initproc)(PyObject *, PyObject *, PyObject *); + # + # All other functions generated by Argument Clinic are stored in + # PyMethodDef structures, in the ml_meth slot, which is of type PyCFunction: + # typedef PyObject *(*PyCFunction)(PyObject *, PyObject *); + # However! We habitually cast these functions to PyCFunction, + # since functions that accept keyword arguments don't fit this signature + # but are stored there anyway. So strict type equality isn't important + # for these functions. + # + # So: + # + # * The name of the first parameter to the impl and the parsing function will always + # be self.name. + # + # * The type of the first parameter to the impl will always be of self.type. + # + # * If the function is neither tp_new (METHOD_NEW) nor tp_init (METHOD_INIT): + # * The type of the first parameter to the parsing function is also self.type. + # This means that if you step into the parsing function, your "self" parameter + # is of the correct type, which may make debugging more pleasant. + # + # * Else if the function is tp_new (METHOD_NEW): + # * The type of the first parameter to the parsing function is "PyTypeObject *", + # so the type signature of the function call is an exact match. + # * If self.type != "PyTypeObject *", we cast the first parameter to self.type + # in the impl call. + # + # * Else if the function is tp_init (METHOD_INIT): + # * The type of the first parameter to the parsing function is "PyObject *", + # so the type signature of the function call is an exact match. + # * If self.type != "PyObject *", we cast the first parameter to self.type + # in the impl call. + + @property + def parser_type(self): + return required_type_for_self_for_parser(self.function) or self.type + + def render(self, parameter, data): + """ + parameter is a clinic.Parameter instance. + data is a CRenderData instance. + """ + if self.function.kind == STATIC_METHOD: + return + + self._render_self(parameter, data) + + if self.type != self.parser_type: + # insert cast to impl_argument[0], aka self. + # we know we're in the first slot in all the CRenderData lists, + # because we render parameters in order, and self is always first. + assert len(data.impl_arguments) == 1 + assert data.impl_arguments[0] == self.name + data.impl_arguments[0] = '(' + self.type + ")" + data.impl_arguments[0] + + def set_template_dict(self, template_dict): + template_dict['self_name'] = self.name + template_dict['self_type'] = self.parser_type + kind = self.function.kind + cls = self.function.cls + + if ((kind in (METHOD_NEW, METHOD_INIT)) and cls and cls.typedef): + if kind == METHOD_NEW: + passed_in_type = self.name + else: + passed_in_type = 'Py_TYPE({})'.format(self.name) + + line = '({passed_in_type} == {type_object}) &&\n ' + d = { + 'type_object': self.function.cls.type_object, + 'passed_in_type': passed_in_type + } + template_dict['self_type_check'] = line.format_map(d) + + + +def add_c_return_converter(f, name=None): + if not name: + name = f.__name__ + if not name.endswith('_return_converter'): + return f + name = name[:-len('_return_converter')] + return_converters[name] = f + return f + + +class CReturnConverterAutoRegister(type): + def __init__(cls, name, bases, classdict): + add_c_return_converter(cls) + +class CReturnConverter(metaclass=CReturnConverterAutoRegister): + + # The C type to use for this variable. + # 'type' should be a Python string specifying the type, e.g. "int". + # If this is a pointer type, the type string should end with ' *'. + type = 'PyObject *' + + # The Python default value for this parameter, as a Python value. + # Or the magic value "unspecified" if there is no default. + default = None + + def __init__(self, *, py_default=None, **kwargs): + self.py_default = py_default + try: + self.return_converter_init(**kwargs) + except TypeError as e: + s = ', '.join(name + '=' + repr(value) for name, value in kwargs.items()) + sys.exit(self.__class__.__name__ + '(' + s + ')\n' + str(e)) + + def return_converter_init(self): + pass + + def declare(self, data, name="_return_value"): + line = [] + add = line.append + add(self.type) + if not self.type.endswith('*'): + add(' ') + add(name + ';') + data.declarations.append(''.join(line)) + data.return_value = name + + def err_occurred_if(self, expr, data): + data.return_conversion.append('if (({}) && PyErr_Occurred())\n goto exit;\n'.format(expr)) + + def err_occurred_if_null_pointer(self, variable, data): + data.return_conversion.append('if ({} == NULL)\n goto exit;\n'.format(variable)) + + def render(self, function, data): + """ + function is a clinic.Function instance. + data is a CRenderData instance. + """ + pass + +add_c_return_converter(CReturnConverter, 'object') + +class NoneType_return_converter(CReturnConverter): + def render(self, function, data): + self.declare(data) + data.return_conversion.append(''' +if (_return_value != Py_None) + goto exit; +return_value = Py_None; +Py_INCREF(Py_None); +'''.strip()) + +class bool_return_converter(CReturnConverter): + type = 'int' + + def render(self, function, data): + self.declare(data) + self.err_occurred_if("_return_value == -1", data) + data.return_conversion.append('return_value = PyBool_FromLong((long)_return_value);\n') + +class long_return_converter(CReturnConverter): + type = 'long' + conversion_fn = 'PyLong_FromLong' + cast = '' + + def render(self, function, data): + self.declare(data) + self.err_occurred_if("_return_value == -1", data) + data.return_conversion.append( + ''.join(('return_value = ', self.conversion_fn, '(', self.cast, '_return_value);\n'))) + +class int_return_converter(long_return_converter): + type = 'int' + cast = '(long)' + +class init_return_converter(long_return_converter): + """ + Special return converter for __init__ functions. + """ + type = 'int' + cast = '(long)' + + def render(self, function, data): + pass + +class unsigned_long_return_converter(long_return_converter): + type = 'unsigned long' + conversion_fn = 'PyLong_FromUnsignedLong' + +class unsigned_int_return_converter(unsigned_long_return_converter): + type = 'unsigned int' + cast = '(unsigned long)' + +class Py_ssize_t_return_converter(long_return_converter): + type = 'Py_ssize_t' + conversion_fn = 'PyLong_FromSsize_t' + +class size_t_return_converter(long_return_converter): + type = 'size_t' + conversion_fn = 'PyLong_FromSize_t' + + +class double_return_converter(CReturnConverter): + type = 'double' + cast = '' + + def render(self, function, data): + self.declare(data) + self.err_occurred_if("_return_value == -1.0", data) + data.return_conversion.append( + 'return_value = PyFloat_FromDouble(' + self.cast + '_return_value);\n') + +class float_return_converter(double_return_converter): + type = 'float' + cast = '(double)' + + +class DecodeFSDefault_return_converter(CReturnConverter): + type = 'char *' + + def render(self, function, data): + self.declare(data) + self.err_occurred_if_null_pointer("_return_value", data) + data.return_conversion.append( + 'return_value = PyUnicode_DecodeFSDefault(_return_value);\n') + + +class IndentStack: + def __init__(self): + self.indents = [] + self.margin = None + + def _ensure(self): + if not self.indents: + fail('IndentStack expected indents, but none are defined.') + + def measure(self, line): + """ + Returns the length of the line's margin. + """ + if '\t' in line: + fail('Tab characters are illegal in the Argument Clinic DSL.') + stripped = line.lstrip() + if not len(stripped): + # we can't tell anything from an empty line + # so just pretend it's indented like our current indent + self._ensure() + return self.indents[-1] + return len(line) - len(stripped) + + def infer(self, line): + """ + Infer what is now the current margin based on this line. + Returns: + 1 if we have indented (or this is the first margin) + 0 if the margin has not changed + -N if we have dedented N times + """ + indent = self.measure(line) + margin = ' ' * indent + if not self.indents: + self.indents.append(indent) + self.margin = margin + return 1 + current = self.indents[-1] + if indent == current: + return 0 + if indent > current: + self.indents.append(indent) + self.margin = margin + return 1 + # indent < current + if indent not in self.indents: + fail("Illegal outdent.") + outdent_count = 0 + while indent != current: + self.indents.pop() + current = self.indents[-1] + outdent_count -= 1 + self.margin = margin + return outdent_count + + @property + def depth(self): + """ + Returns how many margins are currently defined. + """ + return len(self.indents) + + def indent(self, line): + """ + Indents a line by the currently defined margin. + """ + return self.margin + line + + def dedent(self, line): + """ + Dedents a line by the currently defined margin. + (The inverse of 'indent'.) + """ + margin = self.margin + indent = self.indents[-1] + if not line.startswith(margin): + fail('Cannot dedent, line does not start with the previous margin:') + return line[indent:] + + +class DSLParser: + def __init__(self, clinic): + self.clinic = clinic + + self.directives = {} + for name in dir(self): + # functions that start with directive_ are added to directives + _, s, key = name.partition("directive_") + if s: + self.directives[key] = getattr(self, name) + + # functions that start with at_ are too, with an @ in front + _, s, key = name.partition("at_") + if s: + self.directives['@' + key] = getattr(self, name) + + self.reset() + + def reset(self): + self.function = None + self.state = self.state_dsl_start + self.parameter_indent = None + self.keyword_only = False + self.group = 0 + self.parameter_state = self.ps_start + self.seen_positional_with_default = False + self.indent = IndentStack() + self.kind = CALLABLE + self.coexist = False + self.parameter_continuation = '' + self.preserve_output = False + + def directive_version(self, required): + global version + if version_comparitor(version, required) < 0: + fail("Insufficient Clinic version!\n Version: " + version + "\n Required: " + required) + + def directive_module(self, name): + fields = name.split('.') + new = fields.pop() + module, cls = self.clinic._module_and_class(fields) + if cls: + fail("Can't nest a module inside a class!") + + if name in module.classes: + fail("Already defined module " + repr(name) + "!") + + m = Module(name, module) + module.modules[name] = m + self.block.signatures.append(m) + + def directive_class(self, name, typedef, type_object): + fields = name.split('.') + in_classes = False + parent = self + name = fields.pop() + so_far = [] + module, cls = self.clinic._module_and_class(fields) + + parent = cls or module + if name in parent.classes: + fail("Already defined class " + repr(name) + "!") + + c = Class(name, module, cls, typedef, type_object) + parent.classes[name] = c + self.block.signatures.append(c) + + def directive_set(self, name, value): + if name not in ("line_prefix", "line_suffix"): + fail("unknown variable", repr(name)) + + value = value.format_map({ + 'block comment start': '/*', + 'block comment end': '*/', + }) + + self.clinic.__dict__[name] = value + + def directive_destination(self, name, command, *args): + if command == 'new': + self.clinic.add_destination(name, *args) + return + + if command == 'clear': + self.clinic.get_destination(name).clear() + fail("unknown destination command", repr(command)) + + + def directive_output(self, field, destination=''): + fd = self.clinic.field_destinations + + if field == "preset": + preset = self.clinic.presets.get(destination) + if not preset: + fail("Unknown preset " + repr(destination) + "!") + fd.update(preset) + return + + if field == "push": + self.clinic.field_destinations_stack.append(fd.copy()) + return + + if field == "pop": + if not self.clinic.field_destinations_stack: + fail("Can't 'output pop', stack is empty!") + previous_fd = self.clinic.field_destinations_stack.pop() + fd.update(previous_fd) + return + + # secret command for debugging! + if field == "print": + self.block.output.append(pprint.pformat(fd)) + self.block.output.append('\n') + return + + d = self.clinic.get_destination(destination) + + if field == "everything": + for name in list(fd): + fd[name] = d + return + + if field not in fd: + fail("Invalid field " + repr(field) + ", must be one of:\n preset push pop print everything " + " ".join(fd)) + fd[field] = d + + def directive_dump(self, name): + self.block.output.append(self.clinic.get_destination(name).dump()) + + def directive_print(self, *args): + self.block.output.append(' '.join(args)) + self.block.output.append('\n') + + def directive_preserve(self): + if self.preserve_output: + fail("Can't have preserve twice in one block!") + self.preserve_output = True + + def at_classmethod(self): + if self.kind is not CALLABLE: + fail("Can't set @classmethod, function is not a normal callable") + self.kind = CLASS_METHOD + + def at_staticmethod(self): + if self.kind is not CALLABLE: + fail("Can't set @staticmethod, function is not a normal callable") + self.kind = STATIC_METHOD + + def at_coexist(self): + if self.coexist: + fail("Called @coexist twice!") + self.coexist = True + + def parse(self, block): + self.reset() + self.block = block + self.saved_output = self.block.output + block.output = [] + block_start = self.clinic.block_parser.line_number + lines = block.input.split('\n') + for line_number, line in enumerate(lines, self.clinic.block_parser.block_start_line_number): + if '\t' in line: + fail('Tab characters are illegal in the Clinic DSL.\n\t' + repr(line), line_number=block_start) + self.state(line) + + self.next(self.state_terminal) + self.state(None) + + block.output.extend(self.clinic.language.render(clinic, block.signatures)) + + if self.preserve_output: + if block.output: + fail("'preserve' only works for blocks that don't produce any output!") + block.output = self.saved_output + + @staticmethod + def ignore_line(line): + # ignore comment-only lines + if line.lstrip().startswith('#'): + return True + + # Ignore empty lines too + # (but not in docstring sections!) + if not line.strip(): + return True + + return False + + @staticmethod + def calculate_indent(line): + return len(line) - len(line.strip()) + + def next(self, state, line=None): + # real_print(self.state.__name__, "->", state.__name__, ", line=", line) + self.state = state + if line is not None: + self.state(line) + + def state_dsl_start(self, line): + # self.block = self.ClinicOutputBlock(self) + if self.ignore_line(line): + return + + # is it a directive? + fields = shlex.split(line) + directive_name = fields[0] + directive = self.directives.get(directive_name, None) + if directive: + try: + directive(*fields[1:]) + except TypeError as e: + fail(str(e)) + return + + self.next(self.state_modulename_name, line) + + def state_modulename_name(self, line): + # looking for declaration, which establishes the leftmost column + # line should be + # modulename.fnname [as c_basename] [-> return annotation] + # square brackets denote optional syntax. + # + # alternatively: + # modulename.fnname [as c_basename] = modulename.existing_fn_name + # clones the parameters and return converter from that + # function. you can't modify them. you must enter a + # new docstring. + # + # (but we might find a directive first!) + # + # this line is permitted to start with whitespace. + # we'll call this number of spaces F (for "function"). + + if not line.strip(): + return + + self.indent.infer(line) + + # are we cloning? + before, equals, existing = line.rpartition('=') + if equals: + full_name, _, c_basename = before.partition(' as ') + full_name = full_name.strip() + c_basename = c_basename.strip() + existing = existing.strip() + if (is_legal_py_identifier(full_name) and + (not c_basename or is_legal_c_identifier(c_basename)) and + is_legal_py_identifier(existing)): + # we're cloning! + fields = [x.strip() for x in existing.split('.')] + function_name = fields.pop() + module, cls = self.clinic._module_and_class(fields) + + for existing_function in (cls or module).functions: + if existing_function.name == function_name: + break + else: + existing_function = None + if not existing_function: + print("class", cls, "module", module, "existing", existing) + print("cls. functions", cls.functions) + fail("Couldn't find existing function " + repr(existing) + "!") + + fields = [x.strip() for x in full_name.split('.')] + function_name = fields.pop() + module, cls = self.clinic._module_and_class(fields) + + if not (existing_function.kind == self.kind and existing_function.coexist == self.coexist): + fail("'kind' of function and cloned function don't match! (@classmethod/@staticmethod/@coexist)") + self.function = existing_function.copy(name=function_name, full_name=full_name, module=module, cls=cls, c_basename=c_basename, docstring='') + + self.block.signatures.append(self.function) + (cls or module).functions.append(self.function) + self.next(self.state_function_docstring) + return + + line, _, returns = line.partition('->') + + full_name, _, c_basename = line.partition(' as ') + full_name = full_name.strip() + c_basename = c_basename.strip() or None + + if not is_legal_py_identifier(full_name): + fail("Illegal function name: {}".format(full_name)) + if c_basename and not is_legal_c_identifier(c_basename): + fail("Illegal C basename: {}".format(c_basename)) + + return_converter = None + if returns: + ast_input = "def x() -> {}: pass".format(returns) + module = None + try: + module = ast.parse(ast_input) + except SyntaxError: + pass + if not module: + fail("Badly-formed annotation for " + full_name + ": " + returns) + try: + name, legacy, kwargs = self.parse_converter(module.body[0].returns) + if legacy: + fail("Legacy converter {!r} not allowed as a return converter" + .format(name)) + if name not in return_converters: + fail("No available return converter called " + repr(name)) + return_converter = return_converters[name](**kwargs) + except ValueError: + fail("Badly-formed annotation for " + full_name + ": " + returns) + + fields = [x.strip() for x in full_name.split('.')] + function_name = fields.pop() + module, cls = self.clinic._module_and_class(fields) + + fields = full_name.split('.') + if fields[-1] == '__new__': + if (self.kind != CLASS_METHOD) or (not cls): + fail("__new__ must be a class method!") + self.kind = METHOD_NEW + elif fields[-1] == '__init__': + if (self.kind != CALLABLE) or (not cls): + fail("__init__ must be a normal method, not a class or static method!") + self.kind = METHOD_INIT + if not return_converter: + return_converter = init_return_converter() + elif fields[-1] in unsupported_special_methods: + fail(fields[-1] + " is a special method and cannot be converted to Argument Clinic! (Yet.)") + + if not return_converter: + return_converter = CReturnConverter() + + if not module: + fail("Undefined module used in declaration of " + repr(full_name.strip()) + ".") + self.function = Function(name=function_name, full_name=full_name, module=module, cls=cls, c_basename=c_basename, + return_converter=return_converter, kind=self.kind, coexist=self.coexist) + self.block.signatures.append(self.function) + + # insert a self converter automatically + type, name = correct_name_for_self(self.function) + kwargs = {} + if cls and type == "PyObject *": + kwargs['type'] = cls.typedef + sc = self.function.self_converter = self_converter(name, name, self.function, **kwargs) + p_self = Parameter(sc.name, inspect.Parameter.POSITIONAL_ONLY, function=self.function, converter=sc) + self.function.parameters[sc.name] = p_self + + (cls or module).functions.append(self.function) + self.next(self.state_parameters_start) + + # Now entering the parameters section. The rules, formally stated: + # + # * All lines must be indented with spaces only. + # * The first line must be a parameter declaration. + # * The first line must be indented. + # * This first line establishes the indent for parameters. + # * We'll call this number of spaces P (for "parameter"). + # * Thenceforth: + # * Lines indented with P spaces specify a parameter. + # * Lines indented with > P spaces are docstrings for the previous + # parameter. + # * We'll call this number of spaces D (for "docstring"). + # * All subsequent lines indented with >= D spaces are stored as + # part of the per-parameter docstring. + # * All lines will have the first D spaces of the indent stripped + # before they are stored. + # * It's illegal to have a line starting with a number of spaces X + # such that P < X < D. + # * A line with < P spaces is the first line of the function + # docstring, which ends processing for parameters and per-parameter + # docstrings. + # * The first line of the function docstring must be at the same + # indent as the function declaration. + # * It's illegal to have any line in the parameters section starting + # with X spaces such that F < X < P. (As before, F is the indent + # of the function declaration.) + # + # Also, currently Argument Clinic places the following restrictions on groups: + # * Each group must contain at least one parameter. + # * Each group may contain at most one group, which must be the furthest + # thing in the group from the required parameters. (The nested group + # must be the first in the group when it's before the required + # parameters, and the last thing in the group when after the required + # parameters.) + # * There may be at most one (top-level) group to the left or right of + # the required parameters. + # * You must specify a slash, and it must be after all parameters. + # (In other words: either all parameters are positional-only, + # or none are.) + # + # Said another way: + # * Each group must contain at least one parameter. + # * All left square brackets before the required parameters must be + # consecutive. (You can't have a left square bracket followed + # by a parameter, then another left square bracket. You can't + # have a left square bracket, a parameter, a right square bracket, + # and then a left square bracket.) + # * All right square brackets after the required parameters must be + # consecutive. + # + # These rules are enforced with a single state variable: + # "parameter_state". (Previously the code was a miasma of ifs and + # separate boolean state variables.) The states are: + # + # [ [ a, b, ] c, ] d, e, f=3, [ g, h, [ i ] ] / <- line + # 01 2 3 4 5 6 7 <- state transitions + # + # 0: ps_start. before we've seen anything. legal transitions are to 1 or 3. + # 1: ps_left_square_before. left square brackets before required parameters. + # 2: ps_group_before. in a group, before required parameters. + # 3: ps_required. required parameters, positional-or-keyword or positional-only + # (we don't know yet). (renumber left groups!) + # 4: ps_optional. positional-or-keyword or positional-only parameters that + # now must have default values. + # 5: ps_group_after. in a group, after required parameters. + # 6: ps_right_square_after. right square brackets after required parameters. + # 7: ps_seen_slash. seen slash. + ps_start, ps_left_square_before, ps_group_before, ps_required, \ + ps_optional, ps_group_after, ps_right_square_after, ps_seen_slash = range(8) + + def state_parameters_start(self, line): + if self.ignore_line(line): + return + + # if this line is not indented, we have no parameters + if not self.indent.infer(line): + return self.next(self.state_function_docstring, line) + + self.parameter_continuation = '' + return self.next(self.state_parameter, line) + + + def to_required(self): + """ + Transition to the "required" parameter state. + """ + if self.parameter_state != self.ps_required: + self.parameter_state = self.ps_required + for p in self.function.parameters.values(): + p.group = -p.group + + def state_parameter(self, line): + if self.parameter_continuation: + line = self.parameter_continuation + ' ' + line.lstrip() + self.parameter_continuation = '' + + if self.ignore_line(line): + return + + assert self.indent.depth == 2 + indent = self.indent.infer(line) + if indent == -1: + # we outdented, must be to definition column + return self.next(self.state_function_docstring, line) + + if indent == 1: + # we indented, must be to new parameter docstring column + return self.next(self.state_parameter_docstring_start, line) + + line = line.rstrip() + if line.endswith('\\'): + self.parameter_continuation = line[:-1] + return + + line = line.lstrip() + + if line in ('*', '/', '[', ']'): + self.parse_special_symbol(line) + return + + if self.parameter_state in (self.ps_start, self.ps_required): + self.to_required() + elif self.parameter_state == self.ps_left_square_before: + self.parameter_state = self.ps_group_before + elif self.parameter_state == self.ps_group_before: + if not self.group: + self.to_required() + elif self.parameter_state in (self.ps_group_after, self.ps_optional): + pass + else: + fail("Function " + self.function.name + " has an unsupported group configuration. (Unexpected state " + str(self.parameter_state) + ".a)") + + # handle "as" for parameters too + c_name = None + name, have_as_token, trailing = line.partition(' as ') + if have_as_token: + name = name.strip() + if ' ' not in name: + fields = trailing.strip().split(' ') + if not fields: + fail("Invalid 'as' clause!") + c_name = fields[0] + if c_name.endswith(':'): + name += ':' + c_name = c_name[:-1] + fields[0] = name + line = ' '.join(fields) + + base, equals, default = line.rpartition('=') + if not equals: + base = default + default = None + + module = None + try: + ast_input = "def x({}): pass".format(base) + module = ast.parse(ast_input) + except SyntaxError: + try: + # the last = was probably inside a function call, like + # i: int(nullable=True) + # so assume there was no actual default value. + default = None + ast_input = "def x({}): pass".format(line) + module = ast.parse(ast_input) + except SyntaxError: + pass + if not module: + fail("Function " + self.function.name + " has an invalid parameter declaration:\n\t" + line) + + function_args = module.body[0].args + parameter = function_args.args[0] + + parameter_name = parameter.arg + name, legacy, kwargs = self.parse_converter(parameter.annotation) + + if not default: + if self.parameter_state == self.ps_optional: + fail("Can't have a parameter without a default (" + repr(parameter_name) + ")\nafter a parameter with a default!") + value = unspecified + if 'py_default' in kwargs: + fail("You can't specify py_default without specifying a default value!") + else: + if self.parameter_state == self.ps_required: + self.parameter_state = self.ps_optional + default = default.strip() + bad = False + ast_input = "x = {}".format(default) + bad = False + try: + module = ast.parse(ast_input) + + if 'c_default' not in kwargs: + # we can only represent very simple data values in C. + # detect whether default is okay, via a blacklist + # of disallowed ast nodes. + class DetectBadNodes(ast.NodeVisitor): + bad = False + def bad_node(self, node): + self.bad = True + + # inline function call + visit_Call = bad_node + # inline if statement ("x = 3 if y else z") + visit_IfExp = bad_node + + # comprehensions and generator expressions + visit_ListComp = visit_SetComp = bad_node + visit_DictComp = visit_GeneratorExp = bad_node + + # literals for advanced types + visit_Dict = visit_Set = bad_node + visit_List = visit_Tuple = bad_node + + # "starred": "a = [1, 2, 3]; *a" + visit_Starred = bad_node + + # allow ellipsis, for now + # visit_Ellipsis = bad_node + + blacklist = DetectBadNodes() + blacklist.visit(module) + bad = blacklist.bad + else: + # if they specify a c_default, we can be more lenient about the default value. + # but at least make an attempt at ensuring it's a valid expression. + try: + value = eval(default) + if value == unspecified: + fail("'unspecified' is not a legal default value!") + except NameError: + pass # probably a named constant + except Exception as e: + fail("Malformed expression given as default value\n" + "{!r} caused {!r}".format(default, e)) + if bad: + fail("Unsupported expression as default value: " + repr(default)) + + expr = module.body[0].value + # mild hack: explicitly support NULL as a default value + if isinstance(expr, ast.Name) and expr.id == 'NULL': + value = NULL + py_default = 'None' + c_default = "NULL" + elif (isinstance(expr, ast.BinOp) or + (isinstance(expr, ast.UnaryOp) and not isinstance(expr.operand, ast.Num))): + c_default = kwargs.get("c_default") + if not (isinstance(c_default, str) and c_default): + fail("When you specify an expression (" + repr(default) + ") as your default value,\nyou MUST specify a valid c_default.") + py_default = default + value = unknown + elif isinstance(expr, ast.Attribute): + a = [] + n = expr + while isinstance(n, ast.Attribute): + a.append(n.attr) + n = n.value + if not isinstance(n, ast.Name): + fail("Unsupported default value " + repr(default) + " (looked like a Python constant)") + a.append(n.id) + py_default = ".".join(reversed(a)) + + c_default = kwargs.get("c_default") + if not (isinstance(c_default, str) and c_default): + fail("When you specify a named constant (" + repr(py_default) + ") as your default value,\nyou MUST specify a valid c_default.") + + try: + value = eval(py_default) + except NameError: + value = unknown + else: + value = ast.literal_eval(expr) + py_default = repr(value) + if isinstance(value, (bool, None.__class__)): + c_default = "Py_" + py_default + elif isinstance(value, str): + c_default = c_repr(value) + else: + c_default = py_default + + except SyntaxError as e: + fail("Syntax error: " + repr(e.text)) + except (ValueError, AttributeError): + value = unknown + c_default = kwargs.get("c_default") + py_default = default + if not (isinstance(c_default, str) and c_default): + fail("When you specify a named constant (" + repr(py_default) + ") as your default value,\nyou MUST specify a valid c_default.") + + kwargs.setdefault('c_default', c_default) + kwargs.setdefault('py_default', py_default) + + dict = legacy_converters if legacy else converters + legacy_str = "legacy " if legacy else "" + if name not in dict: + fail('{} is not a valid {}converter'.format(name, legacy_str)) + # if you use a c_name for the parameter, we just give that name to the converter + # but the parameter object gets the python name + converter = dict[name](c_name or parameter_name, parameter_name, self.function, value, **kwargs) + + kind = inspect.Parameter.KEYWORD_ONLY if self.keyword_only else inspect.Parameter.POSITIONAL_OR_KEYWORD + + if isinstance(converter, self_converter): + if len(self.function.parameters) == 1: + if (self.parameter_state != self.ps_required): + fail("A 'self' parameter cannot be marked optional.") + if value is not unspecified: + fail("A 'self' parameter cannot have a default value.") + if self.group: + fail("A 'self' parameter cannot be in an optional group.") + kind = inspect.Parameter.POSITIONAL_ONLY + self.parameter_state = self.ps_start + self.function.parameters.clear() + else: + fail("A 'self' parameter, if specified, must be the very first thing in the parameter block.") + + p = Parameter(parameter_name, kind, function=self.function, converter=converter, default=value, group=self.group) + + if parameter_name in self.function.parameters: + fail("You can't have two parameters named " + repr(parameter_name) + "!") + self.function.parameters[parameter_name] = p + + def parse_converter(self, annotation): + if isinstance(annotation, ast.Str): + return annotation.s, True, {} + + if isinstance(annotation, ast.Name): + return annotation.id, False, {} + + if not isinstance(annotation, ast.Call): + fail("Annotations must be either a name, a function call, or a string.") + + name = annotation.func.id + kwargs = {node.arg: ast.literal_eval(node.value) for node in annotation.keywords} + return name, False, kwargs + + def parse_special_symbol(self, symbol): + if self.parameter_state == self.ps_seen_slash: + fail("Function " + self.function.name + " specifies " + symbol + " after /, which is unsupported.") + + if symbol == '*': + if self.keyword_only: + fail("Function " + self.function.name + " uses '*' more than once.") + self.keyword_only = True + elif symbol == '[': + if self.parameter_state in (self.ps_start, self.ps_left_square_before): + self.parameter_state = self.ps_left_square_before + elif self.parameter_state in (self.ps_required, self.ps_group_after): + self.parameter_state = self.ps_group_after + else: + fail("Function " + self.function.name + " has an unsupported group configuration. (Unexpected state " + str(self.parameter_state) + ".b)") + self.group += 1 + self.function.docstring_only = True + elif symbol == ']': + if not self.group: + fail("Function " + self.function.name + " has a ] without a matching [.") + if not any(p.group == self.group for p in self.function.parameters.values()): + fail("Function " + self.function.name + " has an empty group.\nAll groups must contain at least one parameter.") + self.group -= 1 + if self.parameter_state in (self.ps_left_square_before, self.ps_group_before): + self.parameter_state = self.ps_group_before + elif self.parameter_state in (self.ps_group_after, self.ps_right_square_after): + self.parameter_state = self.ps_right_square_after + else: + fail("Function " + self.function.name + " has an unsupported group configuration. (Unexpected state " + str(self.parameter_state) + ".c)") + elif symbol == '/': + # ps_required and ps_optional are allowed here, that allows positional-only without option groups + # to work (and have default values!) + if (self.parameter_state not in (self.ps_required, self.ps_optional, self.ps_right_square_after, self.ps_group_before)) or self.group: + fail("Function " + self.function.name + " has an unsupported group configuration. (Unexpected state " + str(self.parameter_state) + ".d)") + if self.keyword_only: + fail("Function " + self.function.name + " mixes keyword-only and positional-only parameters, which is unsupported.") + self.parameter_state = self.ps_seen_slash + # fixup preceeding parameters + for p in self.function.parameters.values(): + if (p.kind != inspect.Parameter.POSITIONAL_OR_KEYWORD and not isinstance(p.converter, self_converter)): + fail("Function " + self.function.name + " mixes keyword-only and positional-only parameters, which is unsupported.") + p.kind = inspect.Parameter.POSITIONAL_ONLY + + def state_parameter_docstring_start(self, line): + self.parameter_docstring_indent = len(self.indent.margin) + assert self.indent.depth == 3 + return self.next(self.state_parameter_docstring, line) + + # every line of the docstring must start with at least F spaces, + # where F > P. + # these F spaces will be stripped. + def state_parameter_docstring(self, line): + stripped = line.strip() + if stripped.startswith('#'): + return + + indent = self.indent.measure(line) + if indent < self.parameter_docstring_indent: + self.indent.infer(line) + assert self.indent.depth < 3 + if self.indent.depth == 2: + # back to a parameter + return self.next(self.state_parameter, line) + assert self.indent.depth == 1 + return self.next(self.state_function_docstring, line) + + assert self.function.parameters + last_parameter = next(reversed(list(self.function.parameters.values()))) + + new_docstring = last_parameter.docstring + + if new_docstring: + new_docstring += '\n' + if stripped: + new_docstring += self.indent.dedent(line) + + last_parameter.docstring = new_docstring + + # the final stanza of the DSL is the docstring. + def state_function_docstring(self, line): + if self.group: + fail("Function " + self.function.name + " has a ] without a matching [.") + + stripped = line.strip() + if stripped.startswith('#'): + return + + new_docstring = self.function.docstring + if new_docstring: + new_docstring += "\n" + if stripped: + line = self.indent.dedent(line).rstrip() + else: + line = '' + new_docstring += line + self.function.docstring = new_docstring + + def format_docstring(self): + f = self.function + + new_or_init = f.kind in (METHOD_NEW, METHOD_INIT) + if new_or_init and not f.docstring: + # don't render a docstring at all, no signature, nothing. + return f.docstring + + text, add, output = _text_accumulator() + parameters = f.render_parameters + + ## + ## docstring first line + ## + + if new_or_init: + # classes get *just* the name of the class + # not __new__, not __init__, and not module.classname + assert f.cls + add(f.cls.name) + else: + add(f.name) + add('(') + + # populate "right_bracket_count" field for every parameter + assert parameters, "We should always have a self parameter. " + repr(f) + assert isinstance(parameters[0].converter, self_converter) + parameters[0].right_bracket_count = 0 + parameters_after_self = parameters[1:] + if parameters_after_self: + # for now, the only way Clinic supports positional-only parameters + # is if all of them are positional-only... + # + # ... except for self! self is always positional-only. + + positional_only_parameters = [p.kind == inspect.Parameter.POSITIONAL_ONLY for p in parameters_after_self] + if parameters_after_self[0].kind == inspect.Parameter.POSITIONAL_ONLY: + assert all(positional_only_parameters) + for p in parameters: + p.right_bracket_count = abs(p.group) + else: + # don't put any right brackets around non-positional-only parameters, ever. + for p in parameters_after_self: + p.right_bracket_count = 0 + + right_bracket_count = 0 + + def fix_right_bracket_count(desired): + nonlocal right_bracket_count + s = '' + while right_bracket_count < desired: + s += '[' + right_bracket_count += 1 + while right_bracket_count > desired: + s += ']' + right_bracket_count -= 1 + return s + + need_slash = False + added_slash = False + need_a_trailing_slash = False + + # we only need a trailing slash: + # * if this is not a "docstring_only" signature + # * and if the last *shown* parameter is + # positional only + if not f.docstring_only: + for p in reversed(parameters): + if not p.converter.show_in_signature: + continue + if p.is_positional_only(): + need_a_trailing_slash = True + break + + + added_star = False + + first_parameter = True + last_p = parameters[-1] + line_length = len(''.join(text)) + indent = " " * line_length + def add_parameter(text): + nonlocal line_length + nonlocal first_parameter + if first_parameter: + s = text + first_parameter = False + else: + s = ' ' + text + if line_length + len(s) >= 72: + add('\n') + add(indent) + line_length = len(indent) + s = text + line_length += len(s) + add(s) + + for p in parameters: + if not p.converter.show_in_signature: + continue + assert p.name + + is_self = isinstance(p.converter, self_converter) + if is_self and f.docstring_only: + # this isn't a real machine-parsable signature, + # so let's not print the "self" parameter + continue + + if p.is_positional_only(): + need_slash = not f.docstring_only + elif need_slash and not (added_slash or p.is_positional_only()): + added_slash = True + add_parameter('/,') + + if p.is_keyword_only() and not added_star: + added_star = True + add_parameter('*,') + + p_add, p_output = text_accumulator() + p_add(fix_right_bracket_count(p.right_bracket_count)) + + if isinstance(p.converter, self_converter): + # annotate first parameter as being a "self". + # + # if inspect.Signature gets this function, + # and it's already bound, the self parameter + # will be stripped off. + # + # if it's not bound, it should be marked + # as positional-only. + # + # note: we don't print "self" for __init__, + # because this isn't actually the signature + # for __init__. (it can't be, __init__ doesn't + # have a docstring.) if this is an __init__ + # (or __new__), then this signature is for + # calling the class to contruct a new instance. + p_add('$') + + name = p.converter.signature_name or p.name + p_add(name) + + if p.converter.is_optional(): + p_add('=') + value = p.converter.py_default + if not value: + value = repr(p.converter.default) + p_add(value) + + if (p != last_p) or need_a_trailing_slash: + p_add(',') + + add_parameter(p_output()) + + add(fix_right_bracket_count(0)) + if need_a_trailing_slash: + add_parameter('/') + add(')') + + # PEP 8 says: + # + # The Python standard library will not use function annotations + # as that would result in a premature commitment to a particular + # annotation style. Instead, the annotations are left for users + # to discover and experiment with useful annotation styles. + # + # therefore this is commented out: + # + # if f.return_converter.py_default: + # add(' -> ') + # add(f.return_converter.py_default) + + if not f.docstring_only: + add("\n--\n") + + docstring_first_line = output() + + # now fix up the places where the brackets look wrong + docstring_first_line = docstring_first_line.replace(', ]', ',] ') + + # okay. now we're officially building the "parameters" section. + # create substitution text for {parameters} + spacer_line = False + for p in parameters: + if not p.docstring.strip(): + continue + if spacer_line: + add('\n') + else: + spacer_line = True + add(" ") + add(p.name) + add('\n') + add(textwrap.indent(rstrip_lines(p.docstring.rstrip()), " ")) + parameters = output() + if parameters: + parameters += '\n' + + ## + ## docstring body + ## + + docstring = f.docstring.rstrip() + lines = [line.rstrip() for line in docstring.split('\n')] + + # Enforce the summary line! + # The first line of a docstring should be a summary of the function. + # It should fit on one line (80 columns? 79 maybe?) and be a paragraph + # by itself. + # + # Argument Clinic enforces the following rule: + # * either the docstring is empty, + # * or it must have a summary line. + # + # Guido said Clinic should enforce this: + # http://mail.python.org/pipermail/python-dev/2013-June/127110.html + + if len(lines) >= 2: + if lines[1]: + fail("Docstring for " + f.full_name + " does not have a summary line!\n" + + "Every non-blank function docstring must start with\n" + + "a single line summary followed by an empty line.") + elif len(lines) == 1: + # the docstring is only one line right now--the summary line. + # add an empty line after the summary line so we have space + # between it and the {parameters} we're about to add. + lines.append('') + + parameters_marker_count = len(docstring.split('{parameters}')) - 1 + if parameters_marker_count > 1: + fail('You may not specify {parameters} more than once in a docstring!') + + if not parameters_marker_count: + # insert after summary line + lines.insert(2, '{parameters}') + + # insert at front of docstring + lines.insert(0, docstring_first_line) + + docstring = "\n".join(lines) + + add(docstring) + docstring = output() + + docstring = linear_format(docstring, parameters=parameters) + docstring = docstring.rstrip() + + return docstring + + def state_terminal(self, line): + """ + Called when processing the block is done. + """ + assert not line + + if not self.function: + return + + if self.keyword_only: + values = self.function.parameters.values() + if not values: + no_parameter_after_star = True + else: + last_parameter = next(reversed(list(values))) + no_parameter_after_star = last_parameter.kind != inspect.Parameter.KEYWORD_ONLY + if no_parameter_after_star: + fail("Function " + self.function.name + " specifies '*' without any parameters afterwards.") + + # remove trailing whitespace from all parameter docstrings + for name, value in self.function.parameters.items(): + if not value: + continue + value.docstring = value.docstring.rstrip() + + self.function.docstring = self.format_docstring() + + + + +# maps strings to callables. +# the callable should return an object +# that implements the clinic parser +# interface (__init__ and parse). +# +# example parsers: +# "clinic", handles the Clinic DSL +# "python", handles running Python code +# +parsers = {'clinic' : DSLParser, 'python': PythonParser} + + +clinic = None + + +def main(argv): + import sys + + if sys.version_info.major < 3 or sys.version_info.minor < 3: + sys.exit("Error: clinic.py requires Python 3.3 or greater.") + + import argparse + cmdline = argparse.ArgumentParser() + cmdline.add_argument("-f", "--force", action='store_true') + cmdline.add_argument("-o", "--output", type=str) + cmdline.add_argument("-v", "--verbose", action='store_true') + cmdline.add_argument("--converters", action='store_true') + cmdline.add_argument("--make", action='store_true') + cmdline.add_argument("filename", type=str, nargs="*") + ns = cmdline.parse_args(argv) + + if ns.converters: + if ns.filename: + print("Usage error: can't specify --converters and a filename at the same time.") + print() + cmdline.print_usage() + sys.exit(-1) + converters = [] + return_converters = [] + ignored = set(""" + add_c_converter + add_c_return_converter + add_default_legacy_c_converter + add_legacy_c_converter + """.strip().split()) + module = globals() + for name in module: + for suffix, ids in ( + ("_return_converter", return_converters), + ("_converter", converters), + ): + if name in ignored: + continue + if name.endswith(suffix): + ids.append((name, name[:-len(suffix)])) + break + print() + + print("Legacy converters:") + legacy = sorted(legacy_converters) + print(' ' + ' '.join(c for c in legacy if c[0].isupper())) + print(' ' + ' '.join(c for c in legacy if c[0].islower())) + print() + + for title, attribute, ids in ( + ("Converters", 'converter_init', converters), + ("Return converters", 'return_converter_init', return_converters), + ): + print(title + ":") + longest = -1 + for name, short_name in ids: + longest = max(longest, len(short_name)) + for name, short_name in sorted(ids, key=lambda x: x[1].lower()): + cls = module[name] + callable = getattr(cls, attribute, None) + if not callable: + continue + signature = inspect.signature(callable) + parameters = [] + for parameter_name, parameter in signature.parameters.items(): + if parameter.kind == inspect.Parameter.KEYWORD_ONLY: + if parameter.default != inspect.Parameter.empty: + s = '{}={!r}'.format(parameter_name, parameter.default) + else: + s = parameter_name + parameters.append(s) + print(' {}({})'.format(short_name, ', '.join(parameters))) + print() + print("All converters also accept (c_default=None, py_default=None, annotation=None).") + print("All return converters also accept (py_default=None).") + sys.exit(0) + + if ns.make: + if ns.output or ns.filename: + print("Usage error: can't use -o or filenames with --make.") + print() + cmdline.print_usage() + sys.exit(-1) + for root, dirs, files in os.walk('.'): + for rcs_dir in ('.svn', '.git', '.hg', 'build'): + if rcs_dir in dirs: + dirs.remove(rcs_dir) + for filename in files: + if not (filename.endswith('.c') or filename.endswith('.h')): + continue + path = os.path.join(root, filename) + if ns.verbose: + print(path) + parse_file(path, force=ns.force, verify=not ns.force) + return + + if not ns.filename: + cmdline.print_usage() + sys.exit(-1) + + if ns.output and len(ns.filename) > 1: + print("Usage error: can't use -o with multiple filenames.") + print() + cmdline.print_usage() + sys.exit(-1) + + for filename in ns.filename: + if ns.verbose: + print(filename) + parse_file(filename, output=ns.output, force=ns.force, verify=not ns.force) + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/Tools/clinic/clinic_test.py b/Tools/clinic/clinic_test.py new file mode 100644 index 0000000..cd21000 --- /dev/null +++ b/Tools/clinic/clinic_test.py @@ -0,0 +1,801 @@ +# Argument Clinic +# Copyright 2012-2013 by Larry Hastings. +# Licensed to the PSF under a contributor agreement. +# + +import builtins +import clinic +from clinic import DSLParser +import collections +import inspect +from test import support +import sys +import unittest +from unittest import TestCase + + +class FakeConverter: + def __init__(self, name, args): + self.name = name + self.args = args + + +class FakeConverterFactory: + def __init__(self, name): + self.name = name + + def __call__(self, name, default, **kwargs): + return FakeConverter(self.name, kwargs) + + +class FakeConvertersDict: + def __init__(self): + self.used_converters = {} + + def get(self, name, default): + return self.used_converters.setdefault(name, FakeConverterFactory(name)) + +clinic.Clinic.presets_text = '' +c = clinic.Clinic(language='C') + +class FakeClinic: + def __init__(self): + self.converters = FakeConvertersDict() + self.legacy_converters = FakeConvertersDict() + self.language = clinic.CLanguage(None) + self.filename = None + self.block_parser = clinic.BlockParser('', self.language) + self.modules = collections.OrderedDict() + self.classes = collections.OrderedDict() + clinic.clinic = self + self.name = "FakeClinic" + self.line_prefix = self.line_suffix = '' + self.destinations = {} + self.add_destination("block", "buffer") + self.add_destination("file", "buffer") + self.add_destination("suppress", "suppress") + d = self.destinations.get + self.field_destinations = collections.OrderedDict(( + ('docstring_prototype', d('suppress')), + ('docstring_definition', d('block')), + ('methoddef_define', d('block')), + ('impl_prototype', d('block')), + ('parser_prototype', d('suppress')), + ('parser_definition', d('block')), + ('impl_definition', d('block')), + )) + + def get_destination(self, name): + d = self.destinations.get(name) + if not d: + sys.exit("Destination does not exist: " + repr(name)) + return d + + def add_destination(self, name, type, *args): + if name in self.destinations: + sys.exit("Destination already exists: " + repr(name)) + self.destinations[name] = clinic.Destination(name, type, self, *args) + + def is_directive(self, name): + return name == "module" + + def directive(self, name, args): + self.called_directives[name] = args + + _module_and_class = clinic.Clinic._module_and_class + +class ClinicWholeFileTest(TestCase): + def test_eol(self): + # regression test: + # clinic's block parser didn't recognize + # the "end line" for the block if it + # didn't end in "\n" (as in, the last) + # byte of the file was '/'. + # so it woudl spit out an end line for you. + # and since you really already had one, + # the last line of the block got corrupted. + c = clinic.Clinic(clinic.CLanguage(None)) + raw = "/*[clinic]\nfoo\n[clinic]*/" + cooked = c.parse(raw).splitlines() + end_line = cooked[2].rstrip() + # this test is redundant, it's just here explicitly to catch + # the regression test so we don't forget what it looked like + self.assertNotEqual(end_line, "[clinic]*/[clinic]*/") + self.assertEqual(end_line, "[clinic]*/") + + + +class ClinicGroupPermuterTest(TestCase): + def _test(self, l, m, r, output): + computed = clinic.permute_optional_groups(l, m, r) + self.assertEqual(output, computed) + + def test_range(self): + self._test([['start']], ['stop'], [['step']], + ( + ('stop',), + ('start', 'stop',), + ('start', 'stop', 'step',), + )) + + def test_add_window(self): + self._test([['x', 'y']], ['ch'], [['attr']], + ( + ('ch',), + ('ch', 'attr'), + ('x', 'y', 'ch',), + ('x', 'y', 'ch', 'attr'), + )) + + def test_ludicrous(self): + self._test([['a1', 'a2', 'a3'], ['b1', 'b2']], ['c1'], [['d1', 'd2'], ['e1', 'e2', 'e3']], + ( + ('c1',), + ('b1', 'b2', 'c1'), + ('b1', 'b2', 'c1', 'd1', 'd2'), + ('a1', 'a2', 'a3', 'b1', 'b2', 'c1'), + ('a1', 'a2', 'a3', 'b1', 'b2', 'c1', 'd1', 'd2'), + ('a1', 'a2', 'a3', 'b1', 'b2', 'c1', 'd1', 'd2', 'e1', 'e2', 'e3'), + )) + + def test_right_only(self): + self._test([], [], [['a'],['b'],['c']], + ( + (), + ('a',), + ('a', 'b'), + ('a', 'b', 'c') + )) + + def test_have_left_options_but_required_is_empty(self): + def fn(): + clinic.permute_optional_groups(['a'], [], []) + self.assertRaises(AssertionError, fn) + + +class ClinicLinearFormatTest(TestCase): + def _test(self, input, output, **kwargs): + computed = clinic.linear_format(input, **kwargs) + self.assertEqual(output, computed) + + def test_empty_strings(self): + self._test('', '') + + def test_solo_newline(self): + self._test('\n', '\n') + + def test_no_substitution(self): + self._test(""" + abc + """, """ + abc + """) + + def test_empty_substitution(self): + self._test(""" + abc + {name} + def + """, """ + abc + def + """, name='') + + def test_single_line_substitution(self): + self._test(""" + abc + {name} + def + """, """ + abc + GARGLE + def + """, name='GARGLE') + + def test_multiline_substitution(self): + self._test(""" + abc + {name} + def + """, """ + abc + bingle + bungle + + def + """, name='bingle\nbungle\n') + +class InertParser: + def __init__(self, clinic): + pass + + def parse(self, block): + pass + +class CopyParser: + def __init__(self, clinic): + pass + + def parse(self, block): + block.output = block.input + + +class ClinicBlockParserTest(TestCase): + def _test(self, input, output): + language = clinic.CLanguage(None) + + blocks = list(clinic.BlockParser(input, language)) + writer = clinic.BlockPrinter(language) + for block in blocks: + writer.print_block(block) + output = writer.f.getvalue() + assert output == input, "output != input!\n\noutput " + repr(output) + "\n\n input " + repr(input) + + def round_trip(self, input): + return self._test(input, input) + + def test_round_trip_1(self): + self.round_trip(""" + verbatim text here + lah dee dah +""") + def test_round_trip_2(self): + self.round_trip(""" + verbatim text here + lah dee dah +/*[inert] +abc +[inert]*/ +def +/*[inert checksum: 7b18d017f89f61cf17d47f92749ea6930a3f1deb]*/ +xyz +""") + + def _test_clinic(self, input, output): + language = clinic.CLanguage(None) + c = clinic.Clinic(language) + c.parsers['inert'] = InertParser(c) + c.parsers['copy'] = CopyParser(c) + computed = c.parse(input) + self.assertEqual(output, computed) + + def test_clinic_1(self): + self._test_clinic(""" + verbatim text here + lah dee dah +/*[copy input] +def +[copy start generated code]*/ +abc +/*[copy end generated code: output=03cfd743661f0797 input=7b18d017f89f61cf]*/ +xyz +""", """ + verbatim text here + lah dee dah +/*[copy input] +def +[copy start generated code]*/ +def +/*[copy end generated code: output=7b18d017f89f61cf input=7b18d017f89f61cf]*/ +xyz +""") + + +class ClinicParserTest(TestCase): + def test_trivial(self): + parser = DSLParser(FakeClinic()) + block = clinic.Block("module os\nos.access") + parser.parse(block) + module, function = block.signatures + self.assertEqual("access", function.name) + self.assertEqual("os", module.name) + + def test_ignore_line(self): + block = self.parse("#\nmodule os\nos.access") + module, function = block.signatures + self.assertEqual("access", function.name) + self.assertEqual("os", module.name) + + def test_param(self): + function = self.parse_function("module os\nos.access\n path: int") + self.assertEqual("access", function.name) + self.assertEqual(2, len(function.parameters)) + p = function.parameters['path'] + self.assertEqual('path', p.name) + self.assertIsInstance(p.converter, clinic.int_converter) + + def test_param_default(self): + function = self.parse_function("module os\nos.access\n follow_symlinks: bool = True") + p = function.parameters['follow_symlinks'] + self.assertEqual(True, p.default) + + def test_param_with_continuations(self): + function = self.parse_function("module os\nos.access\n follow_symlinks: \\\n bool \\\n =\\\n True") + p = function.parameters['follow_symlinks'] + self.assertEqual(True, p.default) + + def test_param_default_expression(self): + function = self.parse_function("module os\nos.access\n follow_symlinks: int(c_default='MAXSIZE') = sys.maxsize") + p = function.parameters['follow_symlinks'] + self.assertEqual(sys.maxsize, p.default) + self.assertEqual("MAXSIZE", p.converter.c_default) + + s = self.parse_function_should_fail("module os\nos.access\n follow_symlinks: int = sys.maxsize") + self.assertEqual(s, "Error on line 0:\nWhen you specify a named constant ('sys.maxsize') as your default value,\nyou MUST specify a valid c_default.\n") + + def test_param_no_docstring(self): + function = self.parse_function(""" +module os +os.access + follow_symlinks: bool = True + something_else: str = ''""") + p = function.parameters['follow_symlinks'] + self.assertEqual(3, len(function.parameters)) + self.assertIsInstance(function.parameters['something_else'].converter, clinic.str_converter) + + def test_param_default_parameters_out_of_order(self): + s = self.parse_function_should_fail(""" +module os +os.access + follow_symlinks: bool = True + something_else: str""") + self.assertEqual(s, """Error on line 0: +Can't have a parameter without a default ('something_else') +after a parameter with a default! +""") + + def disabled_test_converter_arguments(self): + function = self.parse_function("module os\nos.access\n path: path_t(allow_fd=1)") + p = function.parameters['path'] + self.assertEqual(1, p.converter.args['allow_fd']) + + def test_function_docstring(self): + function = self.parse_function(""" +module os +os.stat as os_stat_fn + + path: str + Path to be examined + +Perform a stat system call on the given path.""") + self.assertEqual(""" +stat($module, /, path) +-- + +Perform a stat system call on the given path. + + path + Path to be examined +""".strip(), function.docstring) + + def test_explicit_parameters_in_docstring(self): + function = self.parse_function(""" +module foo +foo.bar + x: int + Documentation for x. + y: int + +This is the documentation for foo. + +Okay, we're done here. +""") + self.assertEqual(""" +bar($module, /, x, y) +-- + +This is the documentation for foo. + + x + Documentation for x. + +Okay, we're done here. +""".strip(), function.docstring) + + def test_parser_regression_special_character_in_parameter_column_of_docstring_first_line(self): + function = self.parse_function(""" +module os +os.stat + path: str +This/used to break Clinic! +""") + self.assertEqual("stat($module, /, path)\n--\n\nThis/used to break Clinic!", function.docstring) + + def test_c_name(self): + function = self.parse_function("module os\nos.stat as os_stat_fn") + self.assertEqual("os_stat_fn", function.c_basename) + + def test_return_converter(self): + function = self.parse_function("module os\nos.stat -> int") + self.assertIsInstance(function.return_converter, clinic.int_return_converter) + + def test_star(self): + function = self.parse_function("module os\nos.access\n *\n follow_symlinks: bool = True") + p = function.parameters['follow_symlinks'] + self.assertEqual(inspect.Parameter.KEYWORD_ONLY, p.kind) + self.assertEqual(0, p.group) + + def test_group(self): + function = self.parse_function("module window\nwindow.border\n [\n ls : int\n ]\n /\n") + p = function.parameters['ls'] + self.assertEqual(1, p.group) + + def test_left_group(self): + function = self.parse_function(""" +module curses +curses.addch + [ + y: int + Y-coordinate. + x: int + X-coordinate. + ] + ch: char + Character to add. + [ + attr: long + Attributes for the character. + ] + / +""") + for name, group in ( + ('y', -1), ('x', -1), + ('ch', 0), + ('attr', 1), + ): + p = function.parameters[name] + self.assertEqual(p.group, group) + self.assertEqual(p.kind, inspect.Parameter.POSITIONAL_ONLY) + self.assertEqual(function.docstring.strip(), """ +addch([y, x,] ch, [attr]) + + + y + Y-coordinate. + x + X-coordinate. + ch + Character to add. + attr + Attributes for the character. + """.strip()) + + def test_nested_groups(self): + function = self.parse_function(""" +module curses +curses.imaginary + [ + [ + y1: int + Y-coordinate. + y2: int + Y-coordinate. + ] + x1: int + X-coordinate. + x2: int + X-coordinate. + ] + ch: char + Character to add. + [ + attr1: long + Attributes for the character. + attr2: long + Attributes for the character. + attr3: long + Attributes for the character. + [ + attr4: long + Attributes for the character. + attr5: long + Attributes for the character. + attr6: long + Attributes for the character. + ] + ] + / +""") + for name, group in ( + ('y1', -2), ('y2', -2), + ('x1', -1), ('x2', -1), + ('ch', 0), + ('attr1', 1), ('attr2', 1), ('attr3', 1), + ('attr4', 2), ('attr5', 2), ('attr6', 2), + ): + p = function.parameters[name] + self.assertEqual(p.group, group) + self.assertEqual(p.kind, inspect.Parameter.POSITIONAL_ONLY) + + self.assertEqual(function.docstring.strip(), """ +imaginary([[y1, y2,] x1, x2,] ch, [attr1, attr2, attr3, [attr4, attr5, + attr6]]) + + + y1 + Y-coordinate. + y2 + Y-coordinate. + x1 + X-coordinate. + x2 + X-coordinate. + ch + Character to add. + attr1 + Attributes for the character. + attr2 + Attributes for the character. + attr3 + Attributes for the character. + attr4 + Attributes for the character. + attr5 + Attributes for the character. + attr6 + Attributes for the character. + """.strip()) + + def parse_function_should_fail(self, s): + with support.captured_stdout() as stdout: + with self.assertRaises(SystemExit): + self.parse_function(s) + return stdout.getvalue() + + def test_disallowed_grouping__two_top_groups_on_left(self): + s = self.parse_function_should_fail(""" +module foo +foo.two_top_groups_on_left + [ + group1 : int + ] + [ + group2 : int + ] + param: int + """) + self.assertEqual(s, + ('Error on line 0:\n' + 'Function two_top_groups_on_left has an unsupported group configuration. (Unexpected state 2.b)\n')) + + def test_disallowed_grouping__two_top_groups_on_right(self): + self.parse_function_should_fail(""" +module foo +foo.two_top_groups_on_right + param: int + [ + group1 : int + ] + [ + group2 : int + ] + """) + + def test_disallowed_grouping__parameter_after_group_on_right(self): + self.parse_function_should_fail(""" +module foo +foo.parameter_after_group_on_right + param: int + [ + [ + group1 : int + ] + group2 : int + ] + """) + + def test_disallowed_grouping__group_after_parameter_on_left(self): + self.parse_function_should_fail(""" +module foo +foo.group_after_parameter_on_left + [ + group2 : int + [ + group1 : int + ] + ] + param: int + """) + + def test_disallowed_grouping__empty_group_on_left(self): + self.parse_function_should_fail(""" +module foo +foo.empty_group + [ + [ + ] + group2 : int + ] + param: int + """) + + def test_disallowed_grouping__empty_group_on_right(self): + self.parse_function_should_fail(""" +module foo +foo.empty_group + param: int + [ + [ + ] + group2 : int + ] + """) + + def test_no_parameters(self): + function = self.parse_function(""" +module foo +foo.bar + +Docstring + +""") + self.assertEqual("bar($module, /)\n--\n\nDocstring", function.docstring) + self.assertEqual(1, len(function.parameters)) # self! + + def test_init_with_no_parameters(self): + function = self.parse_function(""" +module foo +class foo.Bar "unused" "notneeded" +foo.Bar.__init__ + +Docstring + +""", signatures_in_block=3, function_index=2) + # self is not in the signature + self.assertEqual("Bar()\n--\n\nDocstring", function.docstring) + # but it *is* a parameter + self.assertEqual(1, len(function.parameters)) + + def test_illegal_module_line(self): + self.parse_function_should_fail(""" +module foo +foo.bar => int + / +""") + + def test_illegal_c_basename(self): + self.parse_function_should_fail(""" +module foo +foo.bar as 935 + / +""") + + def test_single_star(self): + self.parse_function_should_fail(""" +module foo +foo.bar + * + * +""") + + def test_parameters_required_after_star_without_initial_parameters_or_docstring(self): + self.parse_function_should_fail(""" +module foo +foo.bar + * +""") + + def test_parameters_required_after_star_without_initial_parameters_with_docstring(self): + self.parse_function_should_fail(""" +module foo +foo.bar + * +Docstring here. +""") + + def test_parameters_required_after_star_with_initial_parameters_without_docstring(self): + self.parse_function_should_fail(""" +module foo +foo.bar + this: int + * +""") + + def test_parameters_required_after_star_with_initial_parameters_and_docstring(self): + self.parse_function_should_fail(""" +module foo +foo.bar + this: int + * +Docstring. +""") + + def test_single_slash(self): + self.parse_function_should_fail(""" +module foo +foo.bar + / + / +""") + + def test_mix_star_and_slash(self): + self.parse_function_should_fail(""" +module foo +foo.bar + x: int + y: int + * + z: int + / +""") + + def test_parameters_not_permitted_after_slash_for_now(self): + self.parse_function_should_fail(""" +module foo +foo.bar + / + x: int +""") + + def test_function_not_at_column_0(self): + function = self.parse_function(""" + module foo + foo.bar + x: int + Nested docstring here, goeth. + * + y: str + Not at column 0! +""") + self.assertEqual(""" +bar($module, /, x, *, y) +-- + +Not at column 0! + + x + Nested docstring here, goeth. +""".strip(), function.docstring) + + def test_parser_regression_special_character_in_parameter_column_of_docstring_first_line(self): + function = self.parse_function(""" +module os +os.stat + path: str +This/used to break Clinic! +""") + self.assertEqual("stat($module, /, path)\n--\n\nThis/used to break Clinic!", function.docstring) + + def test_directive(self): + c = FakeClinic() + parser = DSLParser(c) + parser.flag = False + parser.directives['setflag'] = lambda : setattr(parser, 'flag', True) + block = clinic.Block("setflag") + parser.parse(block) + self.assertTrue(parser.flag) + + def test_legacy_converters(self): + block = self.parse('module os\nos.access\n path: "s"') + module, function = block.signatures + self.assertIsInstance((function.parameters['path']).converter, clinic.str_converter) + + def parse(self, text): + c = FakeClinic() + parser = DSLParser(c) + block = clinic.Block(text) + parser.parse(block) + return block + + def parse_function(self, text, signatures_in_block=2, function_index=1): + block = self.parse(text) + s = block.signatures + self.assertEqual(len(s), signatures_in_block) + assert isinstance(s[0], clinic.Module) + assert isinstance(s[function_index], clinic.Function) + return s[function_index] + + def test_scaffolding(self): + # test repr on special values + self.assertEqual(repr(clinic.unspecified), '<Unspecified>') + self.assertEqual(repr(clinic.NULL), '<Null>') + + # test that fail fails + with support.captured_stdout() as stdout: + with self.assertRaises(SystemExit): + clinic.fail('The igloos are melting!', filename='clown.txt', line_number=69) + self.assertEqual(stdout.getvalue(), 'Error in file "clown.txt" on line 69:\nThe igloos are melting!\n') + + +if __name__ == "__main__": + unittest.main() diff --git a/Tools/clinic/cpp.py b/Tools/clinic/cpp.py new file mode 100644 index 0000000..e099590 --- /dev/null +++ b/Tools/clinic/cpp.py @@ -0,0 +1,191 @@ +import re +import sys + +def negate(condition): + """ + Returns a CPP conditional that is the opposite of the conditional passed in. + """ + if condition.startswith('!'): + return condition[1:] + return "!" + condition + +class Monitor: + """ + A simple C preprocessor that scans C source and computes, line by line, + what the current C preprocessor #if state is. + + Doesn't handle everything--for example, if you have /* inside a C string, + without a matching */ (also inside a C string), or with a */ inside a C + string but on another line and with preprocessor macros in between... + the parser will get lost. + + Anyway this implementation seems to work well enough for the CPython sources. + """ + + is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match + + def __init__(self, filename=None, *, verbose=False): + self.stack = [] + self.in_comment = False + self.continuation = None + self.line_number = 0 + self.filename = filename + self.verbose = verbose + + def __repr__(self): + return ''.join(( + '<Monitor ', + str(id(self)), + " line=", str(self.line_number), + " condition=", repr(self.condition()), + ">")) + + def status(self): + return str(self.line_number).rjust(4) + ": " + self.condition() + + def condition(self): + """ + Returns the current preprocessor state, as a single #if condition. + """ + return " && ".join(condition for token, condition in self.stack) + + def fail(self, *a): + if self.filename: + filename = " " + self.filename + else: + filename = '' + print("Error at" + filename, "line", self.line_number, ":") + print(" ", ' '.join(str(x) for x in a)) + sys.exit(-1) + + def close(self): + if self.stack: + self.fail("Ended file while still in a preprocessor conditional block!") + + def write(self, s): + for line in s.split("\n"): + self.writeline(line) + + def writeline(self, line): + self.line_number += 1 + line = line.strip() + + def pop_stack(): + if not self.stack: + self.fail("#" + token + " without matching #if / #ifdef / #ifndef!") + return self.stack.pop() + + if self.continuation: + line = self.continuation + line + self.continuation = None + + if not line: + return + + if line.endswith('\\'): + self.continuation = line[:-1].rstrip() + " " + return + + # we have to ignore preprocessor commands inside comments + # + # we also have to handle this: + # /* start + # ... + # */ /* <-- tricky! + # ... + # */ + # and this: + # /* start + # ... + # */ /* also tricky! */ + if self.in_comment: + if '*/' in line: + # snip out the comment and continue + # + # GCC allows + # /* comment + # */ #include <stdio.h> + # maybe other compilers too? + _, _, line = line.partition('*/') + self.in_comment = False + + while True: + if '/*' in line: + if self.in_comment: + self.fail("Nested block comment!") + + before, _, remainder = line.partition('/*') + comment, comment_ends, after = remainder.partition('*/') + if comment_ends: + # snip out the comment + line = before.rstrip() + ' ' + after.lstrip() + continue + # comment continues to eol + self.in_comment = True + line = before.rstrip() + break + + # we actually have some // comments + # (but block comments take precedence) + before, line_comment, comment = line.partition('//') + if line_comment: + line = before.rstrip() + + if not line.startswith('#'): + return + + line = line[1:].lstrip() + assert line + + fields = line.split() + token = fields[0].lower() + condition = ' '.join(fields[1:]).strip() + + if_tokens = {'if', 'ifdef', 'ifndef'} + all_tokens = if_tokens | {'elif', 'else', 'endif'} + + if token not in all_tokens: + return + + # cheat a little here, to reuse the implementation of if + if token == 'elif': + pop_stack() + token = 'if' + + if token in if_tokens: + if not condition: + self.fail("Invalid format for #" + token + " line: no argument!") + if token == 'if': + if not self.is_a_simple_defined(condition): + condition = "(" + condition + ")" + else: + fields = condition.split() + if len(fields) != 1: + self.fail("Invalid format for #" + token + " line: should be exactly one argument!") + symbol = fields[0] + condition = 'defined(' + symbol + ')' + if token == 'ifndef': + condition = '!' + condition + + self.stack.append(("if", condition)) + if self.verbose: + print(self.status()) + return + + previous_token, previous_condition = pop_stack() + + if token == 'else': + self.stack.append(('else', negate(previous_condition))) + elif token == 'endif': + pass + if self.verbose: + print(self.status()) + +if __name__ == '__main__': + for filename in sys.argv[1:]: + with open(filename, "rt") as f: + cpp = Monitor(filename, verbose=True) + print() + print(filename) + for line_number, line in enumerate(f.read().split('\n'), 1): + cpp.writeline(line) diff --git a/Tools/freeze/bkfile.py b/Tools/freeze/bkfile.py index 0e895f2..58246fa 100644 --- a/Tools/freeze/bkfile.py +++ b/Tools/freeze/bkfile.py @@ -7,11 +7,11 @@ class _BkFile: self.__backup = file + '~' try: os.unlink(self.__backup) - except os.error: + except OSError: pass try: os.rename(file, self.__backup) - except os.error: + except OSError: self.__backup = None self.__file = _orig_open(file, mode, bufsize) self.closed = self.__file.closed diff --git a/Tools/freeze/freeze.py b/Tools/freeze/freeze.py index a41267a..479ca3c 100755 --- a/Tools/freeze/freeze.py +++ b/Tools/freeze/freeze.py @@ -125,7 +125,7 @@ def main(): # default the exclude list for each platform if win: exclude = exclude + [ 'dos', 'dospath', 'mac', 'macpath', 'macfs', 'MACFS', 'posix', - 'os2', 'ce', + 'ce', ] fail_import = exclude[:] @@ -311,7 +311,7 @@ def main(): try: os.mkdir(odir) print("Created output directory", odir) - except os.error as msg: + except OSError as msg: usage('%s: mkdir failed (%s)' % (odir, str(msg))) base = '' if odir: diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 1c2c3cb..2ad22e2 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -939,21 +939,26 @@ class PyFrameObjectPtr(PyObjectPtr): class PySetObjectPtr(PyObjectPtr): _typename = 'PySetObject' + @classmethod + def _dummy_key(self): + return gdb.lookup_global_symbol('_PySet_Dummy').value() + + def __iter__(self): + dummy_ptr = self._dummy_key() + table = self.field('table') + for i in safe_range(self.field('mask') + 1): + setentry = table[i] + key = setentry['key'] + if key != 0 and key != dummy_ptr: + yield PyObjectPtr.from_pyobject_ptr(key) + def proxyval(self, visited): # Guard against infinite loops: if self.as_address() in visited: return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) visited.add(self.as_address()) - members = [] - table = self.field('table') - for i in safe_range(self.field('mask')+1): - setentry = table[i] - key = setentry['key'] - if key != 0: - key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited) - if key_proxy != '<dummy key>': - members.append(key_proxy) + members = (key.proxyval(visited) for key in self) if self.safe_tp_name() == 'frozenset': return frozenset(members) else: @@ -982,18 +987,11 @@ class PySetObjectPtr(PyObjectPtr): out.write('{') first = True - table = self.field('table') - for i in safe_range(self.field('mask')+1): - setentry = table[i] - key = setentry['key'] - if key != 0: - pyop_key = PyObjectPtr.from_pyobject_ptr(key) - key_proxy = pyop_key.proxyval(visited) # FIXME! - if key_proxy != '<dummy key>': - if not first: - out.write(', ') - first = False - pyop_key.write_repr(out, visited) + for key in self: + if not first: + out.write(', ') + first = False + key.write_repr(out, visited) out.write('}') if tp_name != 'set': @@ -1177,7 +1175,9 @@ class PyUnicodeObjectPtr(PyObjectPtr): # Convert the int code points to unicode characters, and generate a # local unicode instance. # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). - result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs]) + result = u''.join([ + (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd') + for ucs in Py_UNICODEs]) return result def write_repr(self, out, visited): diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py index 8b92ae1..10887ce 100755 --- a/Tools/i18n/makelocalealias.py +++ b/Tools/i18n/makelocalealias.py @@ -7,6 +7,7 @@ """ import locale +import sys # Location of the alias file LOCALE_ALIAS = '/usr/share/X11/locale/locale.alias' @@ -65,9 +66,35 @@ def print_differences(data, olddata): (k, olddata[k], data[k])) # Additions are not mentioned +def optimize(data): + locale_alias = locale.locale_alias + locale.locale_alias = data.copy() + for k, v in data.items(): + del locale.locale_alias[k] + if locale.normalize(k) != v: + locale.locale_alias[k] = v + newdata = locale.locale_alias + errors = check(data) + locale.locale_alias = locale_alias + if errors: + sys.exit(1) + return newdata + +def check(data): + # Check that all alias definitions from the X11 file + # are actually mapped to the correct alias locales. + errors = 0 + for k, v in data.items(): + if locale.normalize(k) != v: + print('ERROR: %a -> %a != %a' % (k, locale.normalize(k), v), + file=sys.stderr) + errors += 1 + return errors + if __name__ == '__main__': data = locale.locale_alias.copy() data.update(parse(LOCALE_ALIAS)) + data = optimize(data) print_differences(data, locale.locale_alias) print() print('locale_alias = {') diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index cd90691..b0751a1 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -218,7 +218,7 @@ def main(): if opt in ('-h', '--help'): usage(0) elif opt in ('-V', '--version'): - print("msgfmt.py", __version__, file=sys.stderr) + print("msgfmt.py", __version__) sys.exit(0) elif opt in ('-o', '--output-file'): outfile = arg diff --git a/Tools/importbench/importbench.py b/Tools/importbench/importbench.py index 714c0e4..635dd56 100644 --- a/Tools/importbench/importbench.py +++ b/Tools/importbench/importbench.py @@ -46,8 +46,7 @@ def from_cache(seconds, repeat): module.__package__ = '' with util.uncache(name): sys.modules[name] = module - for result in bench(name, repeat=repeat, seconds=seconds): - yield result + yield from bench(name, repeat=repeat, seconds=seconds) def builtin_mod(seconds, repeat): @@ -56,9 +55,8 @@ def builtin_mod(seconds, repeat): if name in sys.modules: del sys.modules[name] # Relying on built-in importer being implicit. - for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, - seconds=seconds): - yield result + yield from bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds) def source_wo_bytecode(seconds, repeat): @@ -73,9 +71,8 @@ def source_wo_bytecode(seconds, repeat): loader = (importlib.machinery.SourceFileLoader, importlib.machinery.SOURCE_SUFFIXES, True) sys.path_hooks.append(importlib.machinery.FileFinder.path_hook(loader)) - for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, - seconds=seconds): - yield result + yield from bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds) finally: sys.dont_write_bytecode = False @@ -89,9 +86,8 @@ def _wo_bytecode(module): os.unlink(bytecode_path) sys.dont_write_bytecode = True try: - for result in bench(name, lambda: sys.modules.pop(name), - repeat=repeat, seconds=seconds): - yield result + yield from bench(name, lambda: sys.modules.pop(name), + repeat=repeat, seconds=seconds) finally: sys.dont_write_bytecode = False @@ -127,8 +123,7 @@ def _writing_bytecode(module): def cleanup(): sys.modules.pop(name) os.unlink(imp.cache_from_source(module.__file__)) - for result in bench(name, cleanup, repeat=repeat, seconds=seconds): - yield result + yield from bench(name, cleanup, repeat=repeat, seconds=seconds) writing_bytecode_benchmark.__doc__ = ( writing_bytecode_benchmark.__doc__.format(name)) @@ -148,9 +143,8 @@ def source_using_bytecode(seconds, repeat): sys.path_hooks.append(importlib.machinery.FileFinder.path_hook(loader)) py_compile.compile(mapping[name]) assert os.path.exists(imp.cache_from_source(mapping[name])) - for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, - seconds=seconds): - yield result + yield from bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds) def _using_bytecode(module): @@ -158,9 +152,8 @@ def _using_bytecode(module): def using_bytecode_benchmark(seconds, repeat): """Source w/ bytecode: {}""" py_compile.compile(module.__file__) - for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat, - seconds=seconds): - yield result + yield from bench(name, lambda: sys.modules.pop(name), repeat=repeat, + seconds=seconds) using_bytecode_benchmark.__doc__ = ( using_bytecode_benchmark.__doc__.format(name)) diff --git a/Tools/iobench/iobench.py b/Tools/iobench/iobench.py index 530bc79..712e584 100644 --- a/Tools/iobench/iobench.py +++ b/Tools/iobench/iobench.py @@ -24,6 +24,8 @@ def text_open(fn, mode, encoding=None): try: return open(fn, mode, encoding=encoding or TEXT_ENCODING) except TypeError: + if 'r' in mode: + mode += 'U' # 'U' mode is needed only in Python 2.x return open(fn, mode) def get_file_sizes(): @@ -380,7 +382,7 @@ def prepare_files(): f.write(os.urandom(size)) # Text files chunk = [] - with text_open(__file__, "rU", encoding='utf8') as f: + with text_open(__file__, "r", encoding='utf8') as f: for line in f: if line.startswith("# <iobench text chunk marker>"): break diff --git a/Tools/msi/msi.py b/Tools/msi/msi.py index 5ed025d..8409b81 100644 --- a/Tools/msi/msi.py +++ b/Tools/msi/msi.py @@ -99,7 +99,9 @@ extensions = [ '_multiprocessing.pyd', '_lzma.pyd', '_decimal.pyd', - '_testbuffer.pyd' + '_testbuffer.pyd', + '_testimportmultiple.pyd', + '_overlapped.pyd', ] # Well-known component UUIDs @@ -119,6 +121,7 @@ pythondll_uuid = { "31":"{4afcba0b-13e4-47c3-bebe-477428b46913}", "32":"{3ff95315-1096-4d31-bd86-601d5438ad5e}", "33":"{f7581ca4-d368-4eea-8f82-d48c64c4f047}", + "34":"{7A0C5812-2583-40D9-BCBB-CD7485F11377}", } [major+minor] # Compute the name that Sphinx gives to the docfile @@ -417,6 +420,8 @@ def add_ui(db): compileargs = r'-Wi "[TARGETDIR]Lib\compileall.py" -f -x "bad_coding|badsyntax|site-packages|py2_|lib2to3\\tests|venv\\scripts" "[TARGETDIR]Lib"' lib2to3args = r'-c "import lib2to3.pygram, lib2to3.patcomp;lib2to3.patcomp.PatternCompiler()"' + updatepipargs = r'-m ensurepip -U' + removepipargs = r'-m ensurepip._uninstall' # See "CustomAction Table" add_data(db, "CustomAction", [ # msidbCustomActionTypeFirstSequence + msidbCustomActionTypeTextData + msidbCustomActionTypeProperty @@ -430,9 +435,13 @@ def add_ui(db): ("SetLauncherDirToWindows", 307, "LAUNCHERDIR", "[WindowsFolder]"), # msidbCustomActionTypeExe + msidbCustomActionTypeSourceFile # See "Custom Action Type 18" - ("CompilePyc", 18, "python.exe", compileargs), - ("CompilePyo", 18, "python.exe", "-O "+compileargs), - ("CompileGrammar", 18, "python.exe", lib2to3args), + # msidbCustomActionTypeInScript (1024); run during actual installation + # msidbCustomActionTypeNoImpersonate (2048); run action in system account, not user account + ("CompilePyc", 18+1024+2048, "python.exe", compileargs), + ("CompilePyo", 18+1024+2048, "python.exe", "-O "+compileargs), + ("CompileGrammar", 18+1024+2048, "python.exe", lib2to3args), + ("UpdatePip", 18+1024+2048, "python.exe", updatepipargs), + ("RemovePip", 18+1024+2048, "python.exe", removepipargs), ]) # UI Sequences, see "InstallUISequence Table", "Using a Sequence Table" @@ -459,7 +468,7 @@ def add_ui(db): # Prepend TARGETDIR to the system path, and remove it on uninstall. add_data(db, "Environment", - [("PathAddition", "=-*Path", "[TARGETDIR];[~]", "REGISTRY.path")]) + [("PathAddition", "=-*Path", "[TARGETDIR];[TARGETDIR]Scripts;[~]", "REGISTRY.path")]) # Execute Sequences add_data(db, "InstallExecuteSequence", @@ -469,17 +478,20 @@ def add_ui(db): ("SetLauncherDirToWindows", 'LAUNCHERDIR="" and ' + sys32cond, 753), ("SetLauncherDirToTarget", 'LAUNCHERDIR="" and not ' + sys32cond, 754), ("UpdateEditIDLE", None, 1050), - ("CompilePyc", "COMPILEALL", 6800), - ("CompilePyo", "COMPILEALL", 6801), - ("CompileGrammar", "COMPILEALL", 6802), + # run command if install state of pip changes to INSTALLSTATE_LOCAL + # run after InstallFiles + ("UpdatePip", "&pip_feature=3", 4001), + # remove pip when state changes to INSTALLSTATE_ABSENT + # run before RemoveFiles + ("RemovePip", "&pip_feature=2", 3499), + ("CompilePyc", "COMPILEALL", 4002), + ("CompilePyo", "COMPILEALL", 4003), + ("CompileGrammar", "COMPILEALL", 4004), ]) add_data(db, "AdminExecuteSequence", [("InitialTargetDir", 'TARGETDIR=""', 750), ("SetDLLDirToTarget", 'DLLDIR=""', 751), ("SetLauncherDirToTarget", 'LAUNCHERDIR=""', 752), - ("CompilePyc", "COMPILEALL", 6800), - ("CompilePyo", "COMPILEALL", 6801), - ("CompileGrammar", "COMPILEALL", 6802), ]) ##################################################################### @@ -748,7 +760,8 @@ def add_ui(db): advanced = PyDialog(db, "AdvancedDlg", x, y, w, h, modal, title, "CompilePyc", "Ok", "Ok") advanced.title("Advanced Options for [ProductName]") - # A radio group with two options: allusers, justme + + # A checkbox whether to build pyc files advanced.checkbox("CompilePyc", 135, 60, 230, 50, 3, "COMPILEALL", "Compile .py files to byte code after installation", "Ok") @@ -845,7 +858,8 @@ def add_features(db): # (i.e. additional Python libraries) need to follow the parent feature. # Features that have no advertisement trigger (e.g. the test suite) # must not support advertisement - global default_feature, tcltk, htmlfiles, tools, testsuite, ext_feature, private_crt, prepend_path + global default_feature, tcltk, htmlfiles, tools, testsuite + global ext_feature, private_crt, prepend_path, pip_feature default_feature = Feature(db, "DefaultFeature", "Python", "Python Interpreter and Libraries", 1, directory = "TARGETDIR") @@ -867,8 +881,14 @@ def add_features(db): tools = Feature(db, "Tools", "Utility Scripts", "Python utility scripts (Tools/)", 9, parent = default_feature, attributes=2) + # pip installation isn't enabled by default until a clean uninstall procedure + # becomes possible + pip_feature = Feature(db, "pip_feature", "pip", + "Install (or upgrade from an earlier version) pip, " + "a tool for installing and managing Python packages.", 11, + parent = default_feature, attributes=2|8) testsuite = Feature(db, "Testsuite", "Test suite", - "Python test suite (Lib/test/)", 11, + "Python test suite (Lib/test/)", 13, parent = default_feature, attributes=2|8) # prepend_path is an additional feature which is to be off by default. # Since the default level for the above features is 1, this needs to be @@ -876,7 +896,7 @@ def add_features(db): prepend_path = Feature(db, "PrependPath", "Add python.exe to Path", "Prepend [TARGETDIR] to the system Path variable. " "This allows you to type 'python' into a command " - "prompt without needing the full path.", 13, + "prompt without needing the full path.", 15, parent = default_feature, attributes=2|8, level=2) @@ -954,8 +974,6 @@ def add_files(db): # Add all executables, icons, text files into the TARGETDIR component root = PyDirectory(db, cab, None, srcdir, "TARGETDIR", "SourceDir") default_feature.set_current() - if not msilib.Win64: - root.add_file("%s/w9xpopen.exe" % PCBUILD) root.add_file("README.txt", src="README") root.add_file("NEWS.txt", src="Misc/NEWS") generate_license() @@ -1185,6 +1203,8 @@ def add_registry(db): "Documentation"), ("REGISTRY.path", msilib.gen_uuid(), "TARGETDIR", registry_component, None, None), + ("REGISTRY.ensurepip", msilib.gen_uuid(), "TARGETDIR", registry_component, "EnsurePipRun", + None), ("REGISTRY.def", msilib.gen_uuid(), "TARGETDIR", registry_component, None, None)] + tcldata) # See "FeatureComponents Table". @@ -1202,6 +1222,7 @@ def add_registry(db): [(default_feature.id, "REGISTRY"), (htmlfiles.id, "REGISTRY.doc"), (prepend_path.id, "REGISTRY.path"), + (pip_feature.id, "REGISTRY.ensurepip"), (ext_feature.id, "REGISTRY.def")] + tcldata ) @@ -1269,6 +1290,10 @@ def add_registry(db): "{60254CA5-953B-11CF-8C96-00AA00B8708C}", "REGISTRY.def"), ]) + # PATHEXT + add_data(db, "Environment", + [("PathExtAddition", "=-*PathExt", "[~];.PY", "REGISTRY.def")]) + # Registry keys prefix = r"Software\%sPython\PythonCore\%s" % (testprefix, short_version) add_data(db, "Registry", @@ -1284,7 +1309,9 @@ def add_registry(db): "", r"[TARGETDIR]Python.exe", "REGISTRY.def"), ("DisplayIcon", -1, r"Software\Microsoft\Windows\CurrentVersion\Uninstall\%s" % product_code, - "DisplayIcon", "[TARGETDIR]python.exe", "REGISTRY") + "DisplayIcon", "[TARGETDIR]python.exe", "REGISTRY"), + # Fake registry entry to allow installer to track whether ensurepip has been run + ("EnsurePipRun", -1, prefix+r"\EnsurePipRun", "", "#1", "REGISTRY.ensurepip"), ]) # Shortcuts, see "Shortcut Table" add_data(db, "Directory", diff --git a/Tools/parser/unparse.py b/Tools/parser/unparse.py index b55e2c6..837cd81 100644 --- a/Tools/parser/unparse.py +++ b/Tools/parser/unparse.py @@ -307,6 +307,9 @@ class Unparser: def _Name(self, t): self.write(t.id) + def _NameConstant(self, t): + self.write(repr(t.value)) + def _Num(self, t): # Substitute overflowing decimal literal for AST infinities. self.write(repr(t.n).replace("inf", INFSTR)) @@ -515,10 +518,10 @@ class Unparser: else: self.write(", ") self.write("*") if t.vararg: - self.write(t.vararg) - if t.varargannotation: + self.write(t.vararg.arg) + if t.vararg.annotation: self.write(": ") - self.dispatch(t.varargannotation) + self.dispatch(t.vararg.annotation) # keyword-only arguments if t.kwonlyargs: @@ -534,10 +537,10 @@ class Unparser: if t.kwarg: if first:first = False else: self.write(", ") - self.write("**"+t.kwarg) - if t.kwargannotation: + self.write("**"+t.kwarg.arg) + if t.kwarg.annotation: self.write(": ") - self.dispatch(t.kwargannotation) + self.dispatch(t.kwarg.annotation) def _keyword(self, t): self.write(t.arg) diff --git a/Tools/scripts/README b/Tools/scripts/README index d65d1fd..c6b2282 100644 --- a/Tools/scripts/README +++ b/Tools/scripts/README @@ -2,64 +2,68 @@ This directory contains a collection of executable Python scripts that are useful while building, extending or managing Python. Some (e.g., dutree or lll) are also generally useful UNIX tools. -2to3 Main script for running the 2to3 conversion tool -analyze_dxp.py Analyzes the result of sys.getdxp() -byext.py Print lines/words/chars stats of files by extension -byteyears.py Print product of a file's size and age -checkpyc.py Check presence and validity of ".pyc" files -cleanfuture.py Fix redundant Python __future__ statements -combinerefs.py A helper for analyzing PYTHONDUMPREFS output -copytime.py Copy one file's atime and mtime to another -crlf.py Change CRLF line endings to LF (Windows to Unix) -db2pickle.py Dump a database file to a pickle -diff.py Print file diffs in context, unified, or ndiff formats -dutree.py Format du(1) output as a tree sorted by size -eptags.py Create Emacs TAGS file for Python modules -find_recursionlimit.py Find the maximum recursion limit on this machine -finddiv.py A grep-like tool that looks for division operators -findlinksto.py Recursively find symbolic links to a given path prefix -findnocoding.py Find source files which need an encoding declaration -fixcid.py Massive identifier substitution on C source files -fixdiv.py Tool to fix division operators. -fixheader.py Add some cpp magic to a C include file -fixnotice.py Fix the copyright notice in source files -fixps.py Fix Python scripts' first line (if #!) -ftpmirror.py FTP mirror script -google.py Open a webbrowser with Google -gprof2html.py Transform gprof(1) output into useful HTML -h2py.py Translate #define's into Python assignments -highlight.py Python syntax highlighting with HTML output -idle3 Main program to start IDLE -ifdef.py Remove #if(n)def groups from C sources -lfcr.py Change LF line endings to CRLF (Unix to Windows) -linktree.py Make a copy of a tree with links to original files -lll.py Find and list symbolic links in current directory -mailerdaemon.py Parse error messages from mailer daemons (Sjoerd&Jack) -make_ctype.py Generate ctype.h replacement in stringobject.c -md5sum.py Print MD5 checksums of argument files -mkreal.py Turn a symbolic link into a real file or directory -ndiff.py Intelligent diff between text files (Tim Peters) -nm2def.py Create a template for PC/python_nt.def (Marc Lemburg) -objgraph.py Print object graph from nm output on a library -parseentities.py Utility for parsing HTML entity definitions -patchcheck.py Perform common checks and cleanup before committing -pathfix.py Change #!/usr/local/bin/python into something else -pdeps.py Print dependencies between Python modules -pickle2db.py Load a pickle generated by db2pickle.py to a database -pindent.py Indent Python code, giving block-closing comments -ptags.py Create vi tags file for Python modules -pydoc3 Python documentation browser -pysource.py Find Python source files -redemo.py Basic regular expression demonstration facility -reindent.py Change .py files to use 4-space indents -reindent-rst.py Fix-up reStructuredText file whitespace -rgrep.py Reverse grep through a file (useful for big logfiles) -run_tests.py Run the test suite with more sensible default options -serve.py Small wsgiref-based web server, used in make serve in Doc -suff.py Sort a list of files by suffix -svneol.py Set svn:eol-style on all files in directory -texi2html.py Convert GNU texinfo files into HTML -treesync.py Synchronize source trees (very idiosyncratic) -untabify.py Replace tabs with spaces in argument files -win_add2path.py Add Python to the search path on Windows -which.py Find a program in $PATH +2to3 Main script for running the 2to3 conversion tool +abitype.py Converts a C file to use the PEP 384 type definition API +analyze_dxp.py Analyzes the result of sys.getdxp() +byext.py Print lines/words/chars stats of files by extension +byteyears.py Print product of a file's size and age +checkpyc.py Check presence and validity of ".pyc" files +cleanfuture.py Fix redundant Python __future__ statements +combinerefs.py A helper for analyzing PYTHONDUMPREFS output +copytime.py Copy one file's atime and mtime to another +crlf.py Change CRLF line endings to LF (Windows to Unix) +db2pickle.py Dump a database file to a pickle +diff.py Print file diffs in context, unified, or ndiff formats +dutree.py Format du(1) output as a tree sorted by size +eptags.py Create Emacs TAGS file for Python modules +finddiv.py A grep-like tool that looks for division operators +findlinksto.py Recursively find symbolic links to a given path prefix +findnocoding.py Find source files which need an encoding declaration +find_recursionlimit.py Find the maximum recursion limit on this machine +find-uname.py Look for the given arguments in the sets of all Unicode names +fixcid.py Massive identifier substitution on C source files +fixdiv.py Tool to fix division operators. +fixheader.py Add some cpp magic to a C include file +fixnotice.py Fix the copyright notice in source files +fixps.py Fix Python scripts' first line (if #!) +ftpmirror.py FTP mirror script +get-remote-certificate.py Fetch the certificate that the server(s) are providing in PEM form +google.py Open a webbrowser with Google +gprof2html.py Transform gprof(1) output into useful HTML +h2py.py Translate #define's into Python assignments +highlight.py Python syntax highlighting with HTML output +idle3 Main program to start IDLE +ifdef.py Remove #if(n)def groups from C sources +import_diagnostics.py Miscellaneous diagnostics for the import system +lfcr.py Change LF line endings to CRLF (Unix to Windows) +linktree.py Make a copy of a tree with links to original files +lll.py Find and list symbolic links in current directory +mailerdaemon.py Parse error messages from mailer daemons (Sjoerd&Jack) +make_ctype.py Generate ctype.h replacement in stringobject.c +md5sum.py Print MD5 checksums of argument files +mkreal.py Turn a symbolic link into a real file or directory +ndiff.py Intelligent diff between text files (Tim Peters) +nm2def.py Create a template for PC/python_nt.def (Marc Lemburg) +objgraph.py Print object graph from nm output on a library +parseentities.py Utility for parsing HTML entity definitions +parse_html5_entities.py Utility for parsing HTML5 entity definitions +patchcheck.py Perform common checks and cleanup before committing +pathfix.py Change #!/usr/local/bin/python into something else +pdeps.py Print dependencies between Python modules +pickle2db.py Load a pickle generated by db2pickle.py to a database +pindent.py Indent Python code, giving block-closing comments +ptags.py Create vi tags file for Python modules +pydoc3 Python documentation browser +pysource.py Find Python source files +reindent.py Change .py files to use 4-space indents +reindent-rst.py Fix-up reStructuredText file whitespace +rgrep.py Reverse grep through a file (useful for big logfiles) +run_tests.py Run the test suite with more sensible default options +serve.py Small wsgiref-based web server, used in make serve in Doc +suff.py Sort a list of files by suffix +svneol.py Set svn:eol-style on all files in directory +texi2html.py Convert GNU texinfo files into HTML +treesync.py Synchronize source trees (very idiosyncratic) +untabify.py Replace tabs with spaces in argument files +which.py Find a program in $PATH +win_add2path.py Add Python to the search path on Windows diff --git a/Tools/scripts/byext.py b/Tools/scripts/byext.py index b79ff37..736a441 100755 --- a/Tools/scripts/byext.py +++ b/Tools/scripts/byext.py @@ -25,7 +25,7 @@ class Stats: self.addstats("<dir>", "dirs", 1) try: names = os.listdir(dir) - except os.error as err: + except OSError as err: sys.stderr.write("Can't list %s: %s\n" % (dir, err)) self.addstats("<dir>", "unlistable", 1) return diff --git a/Tools/scripts/byteyears.py b/Tools/scripts/byteyears.py index 490b37f..f58c346 100755 --- a/Tools/scripts/byteyears.py +++ b/Tools/scripts/byteyears.py @@ -43,7 +43,7 @@ def main(): for filename in sys.argv[1:]: try: st = statfunc(filename) - except os.error as msg: + except OSError as msg: sys.stderr.write("can't stat %r: %r\n" % (filename, msg)) status = 1 st = () diff --git a/Tools/scripts/checkpip.py b/Tools/scripts/checkpip.py new file mode 100755 index 0000000..8a64eda --- /dev/null +++ b/Tools/scripts/checkpip.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +""" +Checks that the version of the projects bundled in ensurepip are the latest +versions available. +""" +import ensurepip +import json +import urllib.request +import sys + + +def main(): + outofdate = False + + for project, version in ensurepip._PROJECTS: + data = json.loads(urllib.request.urlopen( + "https://pypi.python.org/pypi/{}/json".format(project), + cadefault=True, + ).read().decode("utf8")) + upstream_version = data["info"]["version"] + + if version != upstream_version: + outofdate = True + print("The latest version of {} on PyPI is {}, but ensurepip " + "has {}".format(project, upstream_version, version)) + + if outofdate: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/Tools/scripts/checkpyc.py b/Tools/scripts/checkpyc.py index d4fdce2..bbaa3d1 100755 --- a/Tools/scripts/checkpyc.py +++ b/Tools/scripts/checkpyc.py @@ -5,11 +5,11 @@ import sys import os from stat import ST_MTIME -import imp +import importlib.util # PEP 3147 compatibility (PYC Repository Directories) -cache_from_source = (imp.cache_from_source if hasattr(imp, 'get_tag') else - lambda path: path + 'c') +cache_from_source = (importlib.util.cache_from_source if sys.implementation.cache_tag + else lambda path: path + 'c') def main(): @@ -18,13 +18,13 @@ def main(): silent = (sys.argv[1] == '-s') else: verbose = silent = False - MAGIC = imp.get_magic() + MAGIC = importlib.util.MAGIC_NUMBER if not silent: print('Using MAGIC word', repr(MAGIC)) for dirname in sys.path: try: names = os.listdir(dirname) - except os.error: + except OSError: print('Cannot list directory', repr(dirname)) continue if not silent: @@ -34,7 +34,7 @@ def main(): name = os.path.join(dirname, name) try: st = os.stat(name) - except os.error: + except OSError: print('Cannot stat', repr(name)) continue if verbose: diff --git a/Tools/scripts/copytime.py b/Tools/scripts/copytime.py index e0220b5..715683f 100755 --- a/Tools/scripts/copytime.py +++ b/Tools/scripts/copytime.py @@ -13,12 +13,12 @@ def main(): file1, file2 = sys.argv[1], sys.argv[2] try: stat1 = os.stat(file1) - except os.error: + except OSError: sys.stderr.write(file1 + ': cannot stat\n') sys.exit(1) try: os.utime(file2, (stat1[ST_ATIME], stat1[ST_MTIME])) - except os.error: + except OSError: sys.stderr.write(file2 + ': cannot change time\n') sys.exit(2) diff --git a/Tools/scripts/diff.py b/Tools/scripts/diff.py index f9b14bf..8be527f 100755 --- a/Tools/scripts/diff.py +++ b/Tools/scripts/diff.py @@ -38,9 +38,9 @@ def main(): fromdate = file_mtime(fromfile) todate = file_mtime(tofile) - with open(fromfile, 'U') as ff: + with open(fromfile) as ff: fromlines = ff.readlines() - with open(tofile, 'U') as tf: + with open(tofile) as tf: tolines = tf.readlines() if options.u: diff --git a/Tools/scripts/finddiv.py b/Tools/scripts/finddiv.py index f24a702..a705f56 100755 --- a/Tools/scripts/finddiv.py +++ b/Tools/scripts/finddiv.py @@ -70,7 +70,7 @@ def process(filename, listnames): def processdir(dir, listnames): try: names = os.listdir(dir) - except os.error as msg: + except OSError as msg: sys.stderr.write("Can't list directory: %s\n" % dir) return 1 files = [] diff --git a/Tools/scripts/findlinksto.py b/Tools/scripts/findlinksto.py index b4c09ef..b924f27 100755 --- a/Tools/scripts/findlinksto.py +++ b/Tools/scripts/findlinksto.py @@ -36,7 +36,7 @@ def visit(prog, dirname, names): linkto = os.readlink(name) if prog.search(linkto) is not None: print(name, '->', linkto) - except os.error: + except OSError: pass if __name__ == '__main__': diff --git a/Tools/scripts/fixcid.py b/Tools/scripts/fixcid.py index 87e2a09..1e4c428 100755 --- a/Tools/scripts/fixcid.py +++ b/Tools/scripts/fixcid.py @@ -97,7 +97,7 @@ def recursedown(dirname): bad = 0 try: names = os.listdir(dirname) - except os.error as msg: + except OSError as msg: err(dirname + ': cannot list directory: ' + str(msg) + '\n') return 1 names.sort() @@ -175,17 +175,17 @@ def fix(filename): try: statbuf = os.stat(filename) os.chmod(tempname, statbuf[ST_MODE] & 0o7777) - except os.error as msg: + except OSError as msg: err(tempname + ': warning: chmod failed (' + str(msg) + ')\n') # Then make a backup of the original file as filename~ try: os.rename(filename, filename + '~') - except os.error as msg: + except OSError as msg: err(filename + ': warning: backup failed (' + str(msg) + ')\n') # Now move the temp file to the original file try: os.rename(tempname, filename) - except os.error as msg: + except OSError as msg: err(filename + ': rename failed (' + str(msg) + ')\n') return 1 # Return success diff --git a/Tools/scripts/ftpmirror.py b/Tools/scripts/ftpmirror.py index 9e8be1d..a1b683a 100755 --- a/Tools/scripts/ftpmirror.py +++ b/Tools/scripts/ftpmirror.py @@ -108,7 +108,7 @@ def mirrorsubdir(f, localdir): if verbose: print('Creating local directory', repr(localdir)) try: makedir(localdir) - except os.error as msg: + except OSError as msg: print("Failed to establish local directory", repr(localdir)) return infofilename = os.path.join(localdir, '.mirrorinfo') @@ -183,7 +183,7 @@ def mirrorsubdir(f, localdir): continue try: os.unlink(tempname) - except os.error: + except OSError: pass if mode[0] == 'l': if verbose: @@ -218,11 +218,11 @@ def mirrorsubdir(f, localdir): fp1.close() try: os.unlink(fullname) - except os.error: + except OSError: pass # Ignore the error try: os.rename(tempname, fullname) - except os.error as msg: + except OSError as msg: print("Can't rename %r to %r: %s" % (tempname, fullname, msg)) continue info[filename] = infostuff @@ -255,7 +255,7 @@ def mirrorsubdir(f, localdir): try: if not localdir: names = os.listdir(os.curdir) else: names = os.listdir(localdir) - except os.error: + except OSError: names = [] for name in names: if name[0] == '.' or name in info or name in subdirs: @@ -312,7 +312,7 @@ def remove(fullname): if os.path.isdir(fullname) and not os.path.islink(fullname): try: names = os.listdir(fullname) - except os.error: + except OSError: names = [] ok = 1 for name in names: @@ -322,13 +322,13 @@ def remove(fullname): return 0 try: os.rmdir(fullname) - except os.error as msg: + except OSError as msg: print("Can't remove local directory %r: %s" % (fullname, msg)) return 0 else: try: os.unlink(fullname) - except os.error as msg: + except OSError as msg: print("Can't remove local file %r: %s" % (fullname, msg)) return 0 return 1 @@ -386,7 +386,7 @@ def writedict(dict, filename): backup = os.path.join(dir, fname + '~') try: os.unlink(backup) - except os.error: + except OSError: pass fp = open(tempname, 'w') fp.write('{\n') @@ -396,7 +396,7 @@ def writedict(dict, filename): fp.close() try: os.rename(filename, backup) - except os.error: + except OSError: pass os.rename(tempname, filename) diff --git a/Tools/scripts/linktree.py b/Tools/scripts/linktree.py index 982f480..e83f198 100755 --- a/Tools/scripts/linktree.py +++ b/Tools/scripts/linktree.py @@ -32,13 +32,13 @@ def main(): return 1 try: os.mkdir(newtree, 0o777) - except os.error as msg: + except OSError as msg: print(newtree + ': cannot mkdir:', msg) return 1 linkname = os.path.join(newtree, link) try: os.symlink(os.path.join(os.pardir, oldtree), linkname) - except os.error as msg: + except OSError as msg: if not link_may_fail: print(linkname + ': cannot symlink:', msg) return 1 @@ -51,7 +51,7 @@ def linknames(old, new, link): if debug: print('linknames', (old, new, link)) try: names = os.listdir(old) - except os.error as msg: + except OSError as msg: print(old + ': warning: cannot listdir:', msg) return for name in names: diff --git a/Tools/scripts/ndiff.py b/Tools/scripts/ndiff.py index 2422091..c6d09b8 100755 --- a/Tools/scripts/ndiff.py +++ b/Tools/scripts/ndiff.py @@ -60,7 +60,7 @@ def fail(msg): # couldn't be opened def fopen(fname): try: - return open(fname, 'U') + return open(fname) except IOError as detail: return fail("couldn't open " + fname + ": " + str(detail)) diff --git a/Tools/scripts/parse_html5_entities.py b/Tools/scripts/parse_html5_entities.py new file mode 100755 index 0000000..c011328 --- /dev/null +++ b/Tools/scripts/parse_html5_entities.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Utility for parsing HTML5 entity definitions available from: + + http://dev.w3.org/html5/spec/entities.json + +Written by Ezio Melotti and Iuliia Proskurnia. + +""" + +import os +import sys +import json +from urllib.request import urlopen +from html.entities import html5 + +entities_url = 'http://dev.w3.org/html5/spec/entities.json' + +def get_json(url): + """Download the json file from the url and returns a decoded object.""" + with urlopen(url) as f: + data = f.read().decode('utf-8') + return json.loads(data) + +def create_dict(entities): + """Create the html5 dict from the decoded json object.""" + new_html5 = {} + for name, value in entities.items(): + new_html5[name.lstrip('&')] = value['characters'] + return new_html5 + +def compare_dicts(old, new): + """Compare the old and new dicts and print the differences.""" + added = new.keys() - old.keys() + if added: + print('{} entitie(s) have been added:'.format(len(added))) + for name in sorted(added): + print(' {!r}: {!r}'.format(name, new[name])) + removed = old.keys() - new.keys() + if removed: + print('{} entitie(s) have been removed:'.format(len(removed))) + for name in sorted(removed): + print(' {!r}: {!r}'.format(name, old[name])) + changed = set() + for name in (old.keys() & new.keys()): + if old[name] != new[name]: + changed.add((name, old[name], new[name])) + if changed: + print('{} entitie(s) have been modified:'.format(len(changed))) + for item in sorted(changed): + print(' {!r}: {!r} -> {!r}'.format(*item)) + +def write_items(entities, file=sys.stdout): + """Write the items of the dictionary in the specified file.""" + # The keys in the generated dictionary should be sorted + # in a case-insensitive way, however, when two keys are equal, + # the uppercase version should come first so that the result + # looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...] + # To do this we first sort in a case-sensitive way (so all the + # uppercase chars come first) and then sort with key=str.lower. + # Since the sorting is stable the uppercase keys will eventually + # be before their equivalent lowercase version. + keys = sorted(entities.keys()) + keys = sorted(keys, key=str.lower) + print('html5 = {', file=file) + for name in keys: + print(' {!r}: {!a},'.format(name, entities[name]), file=file) + print('}', file=file) + + +if __name__ == '__main__': + # without args print a diff between html.entities.html5 and new_html5 + # with --create print the new html5 dict + # with --patch patch the Lib/html/entities.py file + new_html5 = create_dict(get_json(entities_url)) + if '--create' in sys.argv: + print('# map the HTML5 named character references to the ' + 'equivalent Unicode character(s)') + print('# Generated by {}. Do not edit manually.'.format(__file__)) + write_items(new_html5) + elif '--patch' in sys.argv: + fname = 'Lib/html/entities.py' + temp_fname = fname + '.temp' + with open(fname) as f1, open(temp_fname, 'w') as f2: + skip = False + for line in f1: + if line.startswith('html5 = {'): + write_items(new_html5, file=f2) + skip = True + continue + if skip: + # skip the old items until the } + if line.startswith('}'): + skip = False + continue + f2.write(line) + os.remove(fname) + os.rename(temp_fname, fname) + else: + if html5 == new_html5: + print('The current dictionary is updated.') + else: + compare_dicts(html5, new_html5) + print('Run "./python {0} --patch" to update Lib/html/entities.html ' + 'or "./python {0} --create" to see the generated ' 'dictionary.'.format(__file__)) diff --git a/Tools/scripts/pathfix.py b/Tools/scripts/pathfix.py index 13ca866..22432d1 100755 --- a/Tools/scripts/pathfix.py +++ b/Tools/scripts/pathfix.py @@ -73,7 +73,7 @@ def recursedown(dirname): bad = 0 try: names = os.listdir(dirname) - except os.error as msg: + except OSError as msg: err('%s: cannot list directory: %r\n' % (dirname, msg)) return 1 names.sort() @@ -131,24 +131,24 @@ def fix(filename): mtime = statbuf.st_mtime atime = statbuf.st_atime os.chmod(tempname, statbuf[ST_MODE] & 0o7777) - except os.error as msg: + except OSError as msg: err('%s: warning: chmod failed (%r)\n' % (tempname, msg)) # Then make a backup of the original file as filename~ try: os.rename(filename, filename + '~') - except os.error as msg: + except OSError as msg: err('%s: warning: backup failed (%r)\n' % (filename, msg)) # Now move the temp file to the original file try: os.rename(tempname, filename) - except os.error as msg: + except OSError as msg: err('%s: rename failed (%r)\n' % (filename, msg)) return 1 if preserve_timestamps: if atime and mtime: try: os.utime(filename, (atime, mtime)) - except os.error as msg: + except OSError as msg: err('%s: reset of timestamp failed (%r)\n' % (filename, msg)) return 1 # Return success diff --git a/Tools/scripts/pindent.py b/Tools/scripts/pindent.py index 2872dc0..3333420 100755 --- a/Tools/scripts/pindent.py +++ b/Tools/scripts/pindent.py @@ -376,13 +376,13 @@ def make_backup(filename): if os.path.lexists(backup): try: os.remove(backup) - except os.error: + except OSError: print("Can't remove backup %r" % (backup,), file=sys.stderr) # end try # end if try: os.rename(filename, backup) - except os.error: + except OSError: print("Can't rename %r to %r" % (filename, backup), file=sys.stderr) # end try # end def make_backup diff --git a/Tools/scripts/pydocgui.pyw b/Tools/scripts/pydocgui.pyw index 8e9a3d6..8e9a3d6 100644..100755 --- a/Tools/scripts/pydocgui.pyw +++ b/Tools/scripts/pydocgui.pyw diff --git a/Tools/scripts/reindent.py b/Tools/scripts/reindent.py index 4a916ea..18424de 100755 --- a/Tools/scripts/reindent.py +++ b/Tools/scripts/reindent.py @@ -52,8 +52,8 @@ verbose = False recurse = False dryrun = False makebackup = True +# A specified newline to be used in the output (set by --newline option) spec_newline = None -"""A specified newline to be used in the output (set by --newline option)""" def usage(msg=None): diff --git a/Tools/scripts/treesync.py b/Tools/scripts/treesync.py index b2649c4..652d394 100755 --- a/Tools/scripts/treesync.py +++ b/Tools/scripts/treesync.py @@ -78,7 +78,7 @@ def process(slave, master): print("creating slave directory", slave) try: os.mkdir(slave) - except os.error as msg: + except OSError as msg: print("can't make slave directory", slave, ":", msg) return else: diff --git a/Tools/scripts/untabify.py b/Tools/scripts/untabify.py index 4b67c15..861c83c 100755 --- a/Tools/scripts/untabify.py +++ b/Tools/scripts/untabify.py @@ -39,11 +39,11 @@ def process(filename, tabsize, verbose=True): backup = filename + "~" try: os.unlink(backup) - except os.error: + except OSError: pass try: os.rename(filename, backup) - except os.error: + except OSError: pass with open(filename, "w", encoding=encoding) as f: f.write(newtext) diff --git a/Tools/scripts/which.py b/Tools/scripts/which.py index 4fc37a0..df54ce0 100755 --- a/Tools/scripts/which.py +++ b/Tools/scripts/which.py @@ -29,7 +29,7 @@ def main(): filename = os.path.join(dir, prog) try: st = os.stat(filename) - except os.error: + except OSError: continue if not S_ISREG(st[ST_MODE]): msg(filename + ': not a disk file') diff --git a/Tools/scripts/win_add2path.py b/Tools/scripts/win_add2path.py index 9259b44..c85bea5 100644 --- a/Tools/scripts/win_add2path.py +++ b/Tools/scripts/win_add2path.py @@ -30,7 +30,7 @@ def modify(): with winreg.CreateKey(HKCU, ENV) as key: try: envpath = winreg.QueryValueEx(key, PATH)[0] - except WindowsError: + except OSError: envpath = DEFAULT paths = [envpath] diff --git a/Tools/ssl/test_multiple_versions.py b/Tools/ssl/test_multiple_versions.py new file mode 100644 index 0000000..dd57dcf --- /dev/null +++ b/Tools/ssl/test_multiple_versions.py @@ -0,0 +1,241 @@ +#./python +"""Run Python tests with multiple installations of OpenSSL + +The script + + (1) downloads OpenSSL tar bundle + (2) extracts it to ../openssl/src/openssl-VERSION/ + (3) compiles OpenSSL + (4) installs OpenSSL into ../openssl/VERSION/ + (5) forces a recompilation of Python modules using the + header and library files from ../openssl/VERSION/ + (6) runs Python's test suite + +The script must be run with Python's build directory as current working +directory: + + ./python Tools/ssl/test_multiple_versions.py + +The script uses LD_RUN_PATH, LD_LIBRARY_PATH, CPPFLAGS and LDFLAGS to bend +search paths for header files and shared libraries. It's known to work on +Linux with GCC 4.x. + +(c) 2013 Christian Heimes <christian@python.org> +""" +import logging +import os +import tarfile +import shutil +import subprocess +import sys +from urllib.request import urlopen + +log = logging.getLogger("multissl") + +OPENSSL_VERSIONS = [ + "0.9.7m", "0.9.8i", "0.9.8l", "0.9.8m", "0.9.8y", "1.0.0k", "1.0.1e" +] +FULL_TESTS = [ + "test_asyncio", "test_ftplib", "test_hashlib", "test_httplib", + "test_imaplib", "test_nntplib", "test_poplib", "test_smtplib", + "test_smtpnet", "test_urllib2_localnet", "test_venv" +] +MINIMAL_TESTS = ["test_ssl", "test_hashlib"] +CADEFAULT = True +HERE = os.path.abspath(os.getcwd()) +DEST_DIR = os.path.abspath(os.path.join(HERE, os.pardir, "openssl")) + + +class BuildSSL: + url_template = "https://www.openssl.org/source/openssl-{}.tar.gz" + + module_files = ["Modules/_ssl.c", + "Modules/socketmodule.c", + "Modules/_hashopenssl.c"] + + def __init__(self, version, openssl_compile_args=(), destdir=DEST_DIR): + self._check_python_builddir() + self.version = version + self.openssl_compile_args = openssl_compile_args + # installation directory + self.install_dir = os.path.join(destdir, version) + # source file + self.src_file = os.path.join(destdir, "src", + "openssl-{}.tar.gz".format(version)) + # build directory (removed after install) + self.build_dir = os.path.join(destdir, "src", + "openssl-{}".format(version)) + + @property + def openssl_cli(self): + """openssl CLI binary""" + return os.path.join(self.install_dir, "bin", "openssl") + + @property + def openssl_version(self): + """output of 'bin/openssl version'""" + env = os.environ.copy() + env["LD_LIBRARY_PATH"] = self.lib_dir + cmd = [self.openssl_cli, "version"] + return self._subprocess_output(cmd, env=env) + + @property + def pyssl_version(self): + """Value of ssl.OPENSSL_VERSION""" + env = os.environ.copy() + env["LD_LIBRARY_PATH"] = self.lib_dir + cmd = ["./python", "-c", "import ssl; print(ssl.OPENSSL_VERSION)"] + return self._subprocess_output(cmd, env=env) + + @property + def include_dir(self): + return os.path.join(self.install_dir, "include") + + @property + def lib_dir(self): + return os.path.join(self.install_dir, "lib") + + @property + def has_openssl(self): + return os.path.isfile(self.openssl_cli) + + @property + def has_src(self): + return os.path.isfile(self.src_file) + + def _subprocess_call(self, cmd, stdout=subprocess.DEVNULL, env=None, + **kwargs): + log.debug("Call '{}'".format(" ".join(cmd))) + return subprocess.check_call(cmd, stdout=stdout, env=env, **kwargs) + + def _subprocess_output(self, cmd, env=None, **kwargs): + log.debug("Call '{}'".format(" ".join(cmd))) + out = subprocess.check_output(cmd, env=env) + return out.strip().decode("utf-8") + + def _check_python_builddir(self): + if not os.path.isfile("python") or not os.path.isfile("setup.py"): + raise ValueError("Script must be run in Python build directory") + + def _download_openssl(self): + """Download OpenSSL source dist""" + src_dir = os.path.dirname(self.src_file) + if not os.path.isdir(src_dir): + os.makedirs(src_dir) + url = self.url_template.format(self.version) + log.info("Downloading OpenSSL from {}".format(url)) + req = urlopen(url, cadefault=CADEFAULT) + # KISS, read all, write all + data = req.read() + log.info("Storing {}".format(self.src_file)) + with open(self.src_file, "wb") as f: + f.write(data) + + def _unpack_openssl(self): + """Unpack tar.gz bundle""" + # cleanup + if os.path.isdir(self.build_dir): + shutil.rmtree(self.build_dir) + os.makedirs(self.build_dir) + + tf = tarfile.open(self.src_file) + base = "openssl-{}/".format(self.version) + # force extraction into build dir + members = tf.getmembers() + for member in members: + if not member.name.startswith(base): + raise ValueError(member.name) + member.name = member.name[len(base):] + log.info("Unpacking files to {}".format(self.build_dir)) + tf.extractall(self.build_dir, members) + + def _build_openssl(self): + """Now build openssl""" + log.info("Running build in {}".format(self.install_dir)) + cwd = self.build_dir + cmd = ["./config", "shared", "--prefix={}".format(self.install_dir)] + cmd.extend(self.openssl_compile_args) + self._subprocess_call(cmd, cwd=cwd) + self._subprocess_call(["make"], cwd=cwd) + + def _install_openssl(self, remove=True): + self._subprocess_call(["make", "install"], cwd=self.build_dir) + if remove: + shutil.rmtree(self.build_dir) + + def install_openssl(self): + if not self.has_openssl: + if not self.has_src: + self._download_openssl() + else: + log.debug("Already has src {}".format(self.src_file)) + self._unpack_openssl() + self._build_openssl() + self._install_openssl() + else: + log.info("Already has installation {}".format(self.install_dir)) + # validate installation + version = self.openssl_version + if self.version not in version: + raise ValueError(version) + + def touch_pymods(self): + # force a rebuild of all modules that use OpenSSL APIs + for fname in self.module_files: + os.utime(fname) + + def recompile_pymods(self): + log.info("Using OpenSSL build from {}".format(self.build_dir)) + # overwrite header and library search paths + env = os.environ.copy() + env["CPPFLAGS"] = "-I{}".format(self.include_dir) + env["LDFLAGS"] = "-L{}".format(self.lib_dir) + # set rpath + env["LD_RUN_PATH"] = self.lib_dir + + log.info("Rebuilding Python modules") + self.touch_pymods() + cmd = ["./python", "setup.py", "build"] + self._subprocess_call(cmd, env=env) + + def check_pyssl(self): + version = self.pyssl_version + if self.version not in version: + raise ValueError(version) + + def run_pytests(self, *args): + cmd = ["./python", "-m", "test"] + cmd.extend(args) + self._subprocess_call(cmd, stdout=None) + + def run_python_tests(self, *args): + self.recompile_pymods() + self.check_pyssl() + self.run_pytests(*args) + + +def main(*args): + builders = [] + for version in OPENSSL_VERSIONS: + if version in ("0.9.8i", "0.9.8l"): + openssl_compile_args = ("no-asm",) + else: + openssl_compile_args = () + builder = BuildSSL(version, openssl_compile_args) + builder.install_openssl() + builders.append(builder) + + for builder in builders: + builder.run_python_tests(*args) + # final touch + builder.touch_pymods() + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, + format="*** %(levelname)s %(message)s") + args = sys.argv[1:] + if not args: + args = ["-unetwork", "-v"] + args.extend(FULL_TESTS) + main(*args) diff --git a/Tools/stringbench/stringbench.py b/Tools/stringbench/stringbench.py index 142b3ca..5abc25a 100644 --- a/Tools/stringbench/stringbench.py +++ b/Tools/stringbench/stringbench.py @@ -808,7 +808,7 @@ standard libraries, and can be learned in a few days. Many Python programmers report substantial productivity gains and feel the language encourages the development of higher quality, more maintainable code. -Python runs on Windows, Linux/Unix, Mac OS X, OS/2, Amiga, Palm +Python runs on Windows, Linux/Unix, Mac OS X, Amiga, Palm Handhelds, and Nokia mobile phones. Python has also been ported to the Java and .NET virtual machines. diff --git a/Tools/unicode/gencodec.py b/Tools/unicode/gencodec.py index f5a1af3..f4c7c03 100644 --- a/Tools/unicode/gencodec.py +++ b/Tools/unicode/gencodec.py @@ -290,27 +290,27 @@ import codecs class Codec(codecs.Codec): - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_%s) + def encode(self, input, errors='strict'): + return codecs.charmap_encode(input, errors, encoding_%s) - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_%s) + def decode(self, input, errors='strict'): + return codecs.charmap_decode(input, errors, decoding_%s) ''' % (encodingname, name, suffix, suffix)] l.append('''\ class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_%s)[0] + return codecs.charmap_encode(input, self.errors, encoding_%s)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' % + return codecs.charmap_decode(input, self.errors, decoding_%s)[0]''' % (suffix, suffix)) l.append(''' -class StreamWriter(Codec,codecs.StreamWriter): +class StreamWriter(Codec, codecs.StreamWriter): pass -class StreamReader(Codec,codecs.StreamReader): +class StreamReader(Codec, codecs.StreamReader): pass ### encodings module API @@ -343,7 +343,7 @@ def getregentry(): if decoding_table_code: l.append(''' ### Encoding table -encoding_table=codecs.charmap_build(decoding_table) +encoding_table = codecs.charmap_build(decoding_table) ''') else: l.append(''' diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index bc3d0cb..0942508 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -37,7 +37,7 @@ SCRIPT = sys.argv[0] VERSION = "3.2" # The Unicode Database -UNIDATA_VERSION = "6.1.0" +UNIDATA_VERSION = "6.3.0" UNICODE_DATA = "UnicodeData%s.txt" COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" EASTASIAN_WIDTH = "EastAsianWidth%s.txt" @@ -68,7 +68,7 @@ CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd", BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO", "PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS", - "ON" ] + "ON", "LRI", "RLI", "FSI", "PDI" ] EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ] @@ -552,7 +552,7 @@ def makeunicodetype(unicode, trace): print("/* Returns 1 for Unicode characters having the bidirectional", file=fp) print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp) print(" */", file=fp) - print('int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)', file=fp) + print('int _PyUnicode_IsWhitespace(const Py_UCS4 ch)', file=fp) print('{', file=fp) print(' switch (ch) {', file=fp) @@ -570,7 +570,7 @@ def makeunicodetype(unicode, trace): print(" * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional", file=fp) print(" * type 'B', 0 otherwise.", file=fp) print(" */", file=fp) - print('int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)', file=fp) + print('int _PyUnicode_IsLinebreak(const Py_UCS4 ch)', file=fp) print('{', file=fp) print(' switch (ch) {', file=fp) for codepoint in sorted(linebreaks): diff --git a/Tools/unicode/python-mappings/CP273.TXT b/Tools/unicode/python-mappings/CP273.TXT new file mode 100644 index 0000000..7bb5e18 --- /dev/null +++ b/Tools/unicode/python-mappings/CP273.TXT @@ -0,0 +1,258 @@ +0x00 0x0000 #NULL (NUL) +0x01 0x0001 #START OF HEADING (SOH) +0x02 0x0002 #START OF TEXT (STX) +0x03 0x0003 #END OF TEXT (ETX) +0x04 0x009C #STRING TERMINATOR (ST) +0x05 0x0009 #CHARACTER TABULATION (HT) +0x06 0x0086 #START OF SELECTED AREA (SSA) +0x07 0x007F #DELETE (DEL) +0x08 0x0097 #END OF GUARDED AREA (EPA) +0x09 0x008D #REVERSE LINE FEED (RI) +0x0A 0x008E #SINGLE-SHIFT TWO (SS2) +0x0B 0x000B #LINE TABULATION (VT) +0x0C 0x000C #FORM FEED (FF) +0x0D 0x000D #CARRIAGE RETURN (CR) +0x0E 0x000E #SHIFT OUT (SO) +0x0F 0x000F #SHIFT IN (SI) +0x10 0x0010 #DATALINK ESCAPE (DLE) +0x11 0x0011 #DEVICE CONTROL ONE (DC1) +0x12 0x0012 #DEVICE CONTROL TWO (DC2) +0x13 0x0013 #DEVICE CONTROL THREE (DC3) +0x14 0x009D #OPERATING SYSTEM COMMAND (OSC) +0x15 0x0085 #NEXT LINE (NEL) +0x16 0x0008 #BACKSPACE (BS) +0x17 0x0087 #END OF SELECTED AREA (ESA) +0x18 0x0018 #CANCEL (CAN) +0x19 0x0019 #END OF MEDIUM (EM) +0x1A 0x0092 #PRIVATE USE TWO (PU2) +0x1B 0x008F #SINGLE-SHIFT THREE (SS3) +0x1C 0x001C #FILE SEPARATOR (IS4) +0x1D 0x001D #GROUP SEPARATOR (IS3) +0x1E 0x001E #RECORD SEPARATOR (IS2) +0x1F 0x001F #UNIT SEPARATOR (IS1) +0x20 0x0080 #PADDING CHARACTER (PAD) +0x21 0x0081 #HIGH OCTET PRESET (HOP) +0x22 0x0082 #BREAK PERMITTED HERE (BPH) +0x23 0x0083 #NO BREAK HERE (NBH) +0x24 0x0084 #INDEX (IND) +0x25 0x000A #LINE FEED (LF) +0x26 0x0017 #END OF TRANSMISSION BLOCK (ETB) +0x27 0x001B #ESCAPE (ESC) +0x28 0x0088 #CHARACTER TABULATION SET (HTS) +0x29 0x0089 #CHARACTER TABULATION WITH JUSTIFICATION (HTJ) +0x2A 0x008A #LINE TABULATION SET (VTS) +0x2B 0x008B #PARTIAL LINE FORWARD (PLD) +0x2C 0x008C #PARTIAL LINE BACKWARD (PLU) +0x2D 0x0005 #ENQUIRY (ENQ) +0x2E 0x0006 #ACKNOWLEDGE (ACK) +0x2F 0x0007 #BELL (BEL) +0x30 0x0090 #DEVICE CONTROL STRING (DCS) +0x31 0x0091 #PRIVATE USE ONE (PU1) +0x32 0x0016 #SYNCHRONOUS IDLE (SYN) +0x33 0x0093 #SET TRANSMIT STATE (STS) +0x34 0x0094 #CANCEL CHARACTER (CCH) +0x35 0x0095 #MESSAGE WAITING (MW) +0x36 0x0096 #START OF GUARDED AREA (SPA) +0x37 0x0004 #END OF TRANSMISSION (EOT) +0x38 0x0098 #START OF STRING (SOS) +0x39 0x0099 #SINGLE GRAPHIC CHARACTER INTRODUCER (SGCI) +0x3A 0x009A #SINGLE CHARACTER INTRODUCER (SCI) +0x3B 0x009B #CONTROL SEQUENCE INTRODUCER (CSI) +0x3C 0x0014 #DEVICE CONTROL FOUR (DC4) +0x3D 0x0015 #NEGATIVE ACKNOWLEDGE (NAK) +0x3E 0x009E #PRIVACY MESSAGE (PM) +0x3F 0x001A #SUBSTITUTE (SUB) +0x40 0x0020 #SPACE +0x41 0x00A0 #NO-BREAK SPACE +0x42 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0x43 0x007B #LEFT CURLY BRACKET +0x44 0x00E0 #LATIN SMALL LETTER A WITH GRAVE +0x45 0x00E1 #LATIN SMALL LETTER A WITH ACUTE +0x46 0x00E3 #LATIN SMALL LETTER A WITH TILDE +0x47 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE +0x48 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA +0x49 0x00F1 #LATIN SMALL LETTER N WITH TILDE +0x4A 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0x4B 0x002E #FULL STOP +0x4C 0x003C #LESS-THAN SIGN +0x4D 0x0028 #LEFT PARENTHESIS +0x4E 0x002B #PLUS SIGN +0x4F 0x0021 #EXCLAMATION MARK +0x50 0x0026 #AMPERSAND +0x51 0x00E9 #LATIN SMALL LETTER E WITH ACUTE +0x52 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX +0x53 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS +0x54 0x00E8 #LATIN SMALL LETTER E WITH GRAVE +0x55 0x00ED #LATIN SMALL LETTER I WITH ACUTE +0x56 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX +0x57 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS +0x58 0x00EC #LATIN SMALL LETTER I WITH GRAVE +0x59 0x007E #TILDE +0x5A 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS +0x5B 0x0024 #DOLLAR SIGN +0x5C 0x002A #ASTERISK +0x5D 0x0029 #RIGHT PARENTHESIS +0x5E 0x003B #SEMICOLON +0x5F 0x005E #CIRCUMFLEX ACCENT +0x60 0x002D #HYPHEN-MINUS +0x61 0x002F #SOLIDUS +0x62 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0x63 0x005B #LEFT SQUARE BRACKET +0x64 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE +0x65 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE +0x66 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE +0x67 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0x68 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA +0x69 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE +0x6A 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS +0x6B 0x002C #COMMA +0x6C 0x0025 #PERCENT SIGN +0x6D 0x005F #LOW LINE +0x6E 0x003E #GREATER-THAN SIGN +0x6F 0x003F #QUESTION MARK +0x70 0x00F8 #LATIN SMALL LETTER O WITH STROKE +0x71 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE +0x72 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0x73 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS +0x74 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE +0x75 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE +0x76 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0x77 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS +0x78 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE +0x79 0x0060 #GRAVE ACCENT +0x7A 0x003A #COLON +0x7B 0x0023 #NUMBER SIGN +0x7C 0x00A7 #SECTION SIGN +0x7D 0x0027 #APOSTROPHE +0x7E 0x003D #EQUALS SIGN +0x7F 0x0022 #QUOTATION MARK +0x80 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE +0x81 0x0061 #LATIN SMALL LETTER A +0x82 0x0062 #LATIN SMALL LETTER B +0x83 0x0063 #LATIN SMALL LETTER C +0x84 0x0064 #LATIN SMALL LETTER D +0x85 0x0065 #LATIN SMALL LETTER E +0x86 0x0066 #LATIN SMALL LETTER F +0x87 0x0067 #LATIN SMALL LETTER G +0x88 0x0068 #LATIN SMALL LETTER H +0x89 0x0069 #LATIN SMALL LETTER I +0x8A 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0x8B 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0x8C 0x00F0 #LATIN SMALL LETTER ETH (Icelandic) +0x8D 0x00FD #LATIN SMALL LETTER Y WITH ACUTE +0x8E 0x00FE #LATIN SMALL LETTER THORN (Icelandic) +0x8F 0x00B1 #PLUS-MINUS SIGN +0x90 0x00B0 #DEGREE SIGN +0x91 0x006A #LATIN SMALL LETTER J +0x92 0x006B #LATIN SMALL LETTER K +0x93 0x006C #LATIN SMALL LETTER L +0x94 0x006D #LATIN SMALL LETTER M +0x95 0x006E #LATIN SMALL LETTER N +0x96 0x006F #LATIN SMALL LETTER O +0x97 0x0070 #LATIN SMALL LETTER P +0x98 0x0071 #LATIN SMALL LETTER Q +0x99 0x0072 #LATIN SMALL LETTER R +0x9A 0x00AA #FEMININE ORDINAL INDICATOR +0x9B 0x00BA #MASCULINE ORDINAL INDICATOR +0x9C 0x00E6 #LATIN SMALL LETTER AE +0x9D 0x00B8 #CEDILLA +0x9E 0x00C6 #LATIN CAPITAL LETTER AE +0x9F 0x00A4 #CURRENCY SIGN +0xA0 0x00B5 #MICRO SIGN +0xA1 0x00DF #LATIN SMALL LETTER SHARP S (German) +0xA2 0x0073 #LATIN SMALL LETTER S +0xA3 0x0074 #LATIN SMALL LETTER T +0xA4 0x0075 #LATIN SMALL LETTER U +0xA5 0x0076 #LATIN SMALL LETTER V +0xA6 0x0077 #LATIN SMALL LETTER W +0xA7 0x0078 #LATIN SMALL LETTER X +0xA8 0x0079 #LATIN SMALL LETTER Y +0xA9 0x007A #LATIN SMALL LETTER Z +0xAA 0x00A1 #INVERTED EXCLAMATION MARK +0xAB 0x00BF #INVERTED QUESTION MARK +0xAC 0x00D0 #LATIN CAPITAL LETTER ETH (Icelandic) +0xAD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE +0xAE 0x00DE #LATIN CAPITAL LETTER THORN (Icelandic) +0xAF 0x00AE #REGISTERED SIGN +0xB0 0x00A2 #CENT SIGN +0xB1 0x00A3 #POUND SIGN +0xB2 0x00A5 #YEN SIGN +0xB3 0x00B7 #MIDDLE DOT +0xB4 0x00A9 #COPYRIGHT SIGN +0xB5 0x0040 #COMMERCIAL AT +0xB6 0x00B6 #PILCROW SIGN +0xB7 0x00BC #VULGAR FRACTION ONE QUARTER +0xB8 0x00BD #VULGAR FRACTION ONE HALF +0xB9 0x00BE #VULGAR FRACTION THREE QUARTERS +0xBA 0x00AC #NOT SIGN +0xBB 0x007C #VERTICAL LINE +0xBC 0x203E #OVERLINE +0xBD 0x00A8 #DIAERESIS +0xBE 0x00B4 #ACUTE ACCENT +0xBF 0x00D7 #MULTIPLICATION SIGN +0xC0 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS +0xC1 0x0041 #LATIN CAPITAL LETTER A +0xC2 0x0042 #LATIN CAPITAL LETTER B +0xC3 0x0043 #LATIN CAPITAL LETTER C +0xC4 0x0044 #LATIN CAPITAL LETTER D +0xC5 0x0045 #LATIN CAPITAL LETTER E +0xC6 0x0046 #LATIN CAPITAL LETTER F +0xC7 0x0047 #LATIN CAPITAL LETTER G +0xC8 0x0048 #LATIN CAPITAL LETTER H +0xC9 0x0049 #LATIN CAPITAL LETTER I +0xCA 0x00AD #SOFT HYPHEN +0xCB 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xCC 0x00A6 #BROKEN BAR +0xCD 0x00F2 #LATIN SMALL LETTER O WITH GRAVE +0xCE 0x00F3 #LATIN SMALL LETTER O WITH ACUTE +0xCF 0x00F5 #LATIN SMALL LETTER O WITH TILDE +0xD0 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xD1 0x004A #LATIN CAPITAL LETTER J +0xD2 0x004B #LATIN CAPITAL LETTER K +0xD3 0x004C #LATIN CAPITAL LETTER L +0xD4 0x004D #LATIN CAPITAL LETTER M +0xD5 0x004E #LATIN CAPITAL LETTER N +0xD6 0x004F #LATIN CAPITAL LETTER O +0xD7 0x0050 #LATIN CAPITAL LETTER P +0xD8 0x0051 #LATIN CAPITAL LETTER Q +0xD9 0x0052 #LATIN CAPITAL LETTER R +0xDA 0x00B9 #SUPERSCRIPT ONE +0xDB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX +0xDC 0x007D #RIGHT CURLY BRACKET +0xDD 0x00F9 #LATIN SMALL LETTER U WITH GRAVE +0xDE 0x00FA #LATIN SMALL LETTER U WITH ACUTE +0xDF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS +0xE0 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xE1 0x00F7 #DIVISION SIGN +0xE2 0x0053 #LATIN CAPITAL LETTER S +0xE3 0x0054 #LATIN CAPITAL LETTER T +0xE4 0x0055 #LATIN CAPITAL LETTER U +0xE5 0x0056 #LATIN CAPITAL LETTER V +0xE6 0x0057 #LATIN CAPITAL LETTER W +0xE7 0x0058 #LATIN CAPITAL LETTER X +0xE8 0x0059 #LATIN CAPITAL LETTER Y +0xE9 0x005A #LATIN CAPITAL LETTER Z +0xEA 0x00B2 #SUPERSCRIPT TWO +0xEB 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xEC 0x005C #REVERSE SOLIDUS +0xED 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE +0xEE 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE +0xEF 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE +0xF0 0x0030 #DIGIT ZERO +0xF1 0x0031 #DIGIT ONE +0xF2 0x0032 #DIGIT TWO +0xF3 0x0033 #DIGIT THREE +0xF4 0x0034 #DIGIT FOUR +0xF5 0x0035 #DIGIT FIVE +0xF6 0x0036 #DIGIT SIX +0xF7 0x0037 #DIGIT SEVEN +0xF8 0x0038 #DIGIT EIGHT +0xF9 0x0039 #DIGIT NINE +0xFA 0x00B3 #SUPERSCRIPT THREE +0xFB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xFC 0x005D #RIGHT SQUARE BRACKET +0xFD 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE +0xFE 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE +0xFF 0x009F #APPLICATION PROGRAM COMMAND (APC) + + |