diff options
author | Larry Hastings <larry@hastings.org> | 2014-01-28 13:00:08 (GMT) |
---|---|---|
committer | Larry Hastings <larry@hastings.org> | 2014-01-28 13:00:08 (GMT) |
commit | 581ee3618c756132359d98b6fc149ec7e7ca9ef9 (patch) | |
tree | fa211357051306722af3b064f5b095992105524c /Tools/clinic | |
parent | eecbbad89b60c20641fa8dd1c12f52b3648408ea (diff) | |
download | cpython-581ee3618c756132359d98b6fc149ec7e7ca9ef9.zip cpython-581ee3618c756132359d98b6fc149ec7e7ca9ef9.tar.gz cpython-581ee3618c756132359d98b6fc149ec7e7ca9ef9.tar.bz2 |
Issue #20326: Argument Clinic now uses a simple, unique signature to
annotate text signatures in docstrings, resulting in fewer false
positives. "self" parameters are also explicitly marked, allowing
inspect.Signature() to authoritatively detect (and skip) said parameters.
Issue #20326: Argument Clinic now generates separate checksums for the
input and output sections of the block, allowing external tools to verify
that the input has not changed (and thus the output is not out-of-date).
Diffstat (limited to 'Tools/clinic')
-rwxr-xr-x | Tools/clinic/clinic.py | 193 |
1 files changed, 150 insertions, 43 deletions
diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index f3fe3c1..a68551f 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -19,6 +19,7 @@ import os import pprint import re import shlex +import string import sys import tempfile import textwrap @@ -98,7 +99,7 @@ def warn_or_fail(fail=False, *args, filename=None, line_number=None): if clinic: if filename is None: filename = clinic.filename - if clinic.block_parser and (line_number is None): + if getattr(clinic, 'block_parser', None) and (line_number is None): line_number = clinic.block_parser.line_number if filename is not None: add(' in file "' + filename + '"') @@ -335,6 +336,22 @@ class CRenderData: self.cleanup = [] +class FormatCounterFormatter(string.Formatter): + """ + This counts how many instances of each formatter + "replacement string" appear in the format string. + + e.g. after evaluating "string {a}, {b}, {c}, {a}" + the counts dict would now look like + {'a': 2, 'b': 1, 'c': 1} + """ + def __init__(self): + self.counts = collections.Counter() + + def get_value(self, key, args, kwargs): + self.counts[key] += 1 + return '' + class Language(metaclass=abc.ABCMeta): start_line = "" @@ -347,18 +364,59 @@ class Language(metaclass=abc.ABCMeta): pass def validate(self): - def assert_only_one(field, token='dsl_name'): - line = getattr(self, field) - token = '{' + token + '}' - if len(line.split(token)) != 2: - fail(self.__class__.__name__ + " " + field + " must contain " + token + " exactly once!") + def assert_only_one(attr, *additional_fields): + """ + Ensures that the string found at getattr(self, attr) + contains exactly one formatter replacement string for + each valid field. The list of valid fields is + ['dsl_name'] extended by additional_fields. + + e.g. + self.fmt = "{dsl_name} {a} {b}" + + # this passes + self.assert_only_one('fmt', 'a', 'b') + + # this fails, the format string has a {b} in it + self.assert_only_one('fmt', 'a') + + # this fails, the format string doesn't have a {c} in it + self.assert_only_one('fmt', 'a', 'b', 'c') + + # this fails, the format string has two {a}s in it, + # it must contain exactly one + self.fmt2 = '{dsl_name} {a} {a}' + self.assert_only_one('fmt2', 'a') + + """ + fields = ['dsl_name'] + fields.extend(additional_fields) + line = getattr(self, attr) + fcf = FormatCounterFormatter() + fcf.format(line) + def local_fail(should_be_there_but_isnt): + if should_be_there_but_isnt: + fail("{} {} must contain {{{}}} exactly once!".format( + self.__class__.__name__, attr, name)) + else: + fail("{} {} must not contain {{{}}}!".format( + self.__class__.__name__, attr, name)) + + for name, count in fcf.counts.items(): + if name in fields: + if count > 1: + local_fail(True) + else: + local_fail(False) + for name in fields: + if fcf.counts.get(name) != 1: + local_fail(True) + assert_only_one('start_line') assert_only_one('stop_line') - assert_only_one('checksum_line') - assert_only_one('checksum_line', 'checksum') - if len(self.body_prefix.split('{dsl_name}')) >= 3: - fail(self.__class__.__name__ + " body_prefix may contain " + token + " once at most!") + field = "arguments" if "{arguments}" in self.checksum_line else "checksum" + assert_only_one('checksum_line', field) @@ -368,7 +426,7 @@ class PythonLanguage(Language): start_line = "#/*[{dsl_name} input]" body_prefix = "#" stop_line = "#[{dsl_name} start generated code]*/" - checksum_line = "#/*[{dsl_name} end generated code: checksum={checksum}]*/" + checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/" def permute_left_option_groups(l): @@ -438,7 +496,7 @@ class CLanguage(Language): start_line = "/*[{dsl_name} input]" body_prefix = "" stop_line = "[{dsl_name} start generated code]*/" - checksum_line = "/*[{dsl_name} end generated code: checksum={checksum}]*/" + checksum_line = "/*[{dsl_name} end generated code: {arguments}]*/" def render(self, clinic, signatures): function = None @@ -1103,10 +1161,12 @@ def OverrideStdioWith(stdout): sys.stdout = saved_stdout -def create_regex(before, after): +def create_regex(before, after, word=True): """Create an re object for matching marker lines.""" - pattern = r'^{}(\w+){}$' - return re.compile(pattern.format(re.escape(before), re.escape(after))) + group_re = "\w+" if word else ".+" + pattern = r'^{}({}){}$' + pattern = pattern.format(re.escape(before), group_re, re.escape(after)) + return re.compile(pattern) class Block: @@ -1164,6 +1224,16 @@ class Block: self.indent = indent self.preindent = preindent + def __repr__(self): + dsl_name = self.dsl_name or "text" + def summarize(s): + s = repr(s) + if len(s) > 30: + return s[:26] + "..." + s[0] + return s + return "".join(( + "<Block ", dsl_name, " input=", summarize(self.input), " output=", summarize(self.output), ">")) + class BlockParser: """ @@ -1264,29 +1334,43 @@ class BlockParser: if self.last_dsl_name == dsl_name: checksum_re = self.last_checksum_re else: - before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, checksum='{checksum}').partition('{checksum}') - assert _ == '{checksum}' - checksum_re = create_regex(before, after) + before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}') + assert _ == '{arguments}' + checksum_re = create_regex(before, after, word=False) self.last_dsl_name = dsl_name self.last_checksum_re = checksum_re # scan forward for checksum line output_add, output_output = text_accumulator() - checksum = None + arguments = None while self.input: line = self._line() match = checksum_re.match(line.lstrip()) - checksum = match.group(1) if match else None - if checksum: + arguments = match.group(1) if match else None + if arguments: break output_add(line) if self.is_start_line(line): break output = output_output() - if checksum: + if arguments: + d = {} + for field in shlex.split(arguments): + name, equals, value = field.partition('=') + if not equals: + fail("Mangled Argument Clinic marker line: {!r}".format(line)) + d[name.strip()] = value.strip() + if self.verify: - computed = compute_checksum(output) + if 'input' in d: + checksum = d['output'] + input_checksum = d['input'] + else: + checksum = d['checksum'] + input_checksum = None + + computed = compute_checksum(output, len(checksum)) if checksum != computed: fail("Checksum mismatch!\nExpected: {}\nComputed: {}\n" "Suggested fix: remove all generated code including " @@ -1336,13 +1420,15 @@ class BlockPrinter: write(self.language.stop_line.format(dsl_name=dsl_name)) write("\n") + input = ''.join(block.input) output = ''.join(block.output) if output: if not output.endswith('\n'): output += '\n' write(output) - write(self.language.checksum_line.format(dsl_name=dsl_name, checksum=compute_checksum(output))) + arguments="output={} input={}".format(compute_checksum(output, 16), compute_checksum(input, 16)) + write(self.language.checksum_line.format(dsl_name=dsl_name, arguments=arguments)) write("\n") def write(self, text): @@ -1468,7 +1554,7 @@ impl_definition block """ - def __init__(self, language, printer=None, *, verify=True, filename=None): + def __init__(self, language, printer=None, *, force=False, verify=True, filename=None): # maps strings to Parser objects. # (instantiated from the "parsers" global.) self.parsers = {} @@ -1477,6 +1563,7 @@ impl_definition block fail("Custom printers are broken right now") self.printer = printer or BlockPrinter(language) self.verify = verify + self.force = force self.filename = filename self.modules = collections.OrderedDict() self.classes = collections.OrderedDict() @@ -1594,11 +1681,12 @@ impl_definition block fail("Can't write to destination {}, " "can't make directory {}!".format( destination.filename, dirname)) - with open(destination.filename, "rt") as f: - parser_2 = BlockParser(f.read(), language=self.language) - blocks = list(parser_2) - if (len(blocks) != 1) or (blocks[0].input != 'preserve\n'): - fail("Modified destination file " + repr(destination.filename) + ", not overwriting!") + if self.verify: + with open(destination.filename, "rt") as f: + parser_2 = BlockParser(f.read(), language=self.language) + blocks = list(parser_2) + if (len(blocks) != 1) or (blocks[0].input != 'preserve\n'): + fail("Modified destination file " + repr(destination.filename) + ", not overwriting!") except FileNotFoundError: pass @@ -1658,7 +1746,7 @@ impl_definition block return module, cls -def parse_file(filename, *, verify=True, output=None, encoding='utf-8'): +def parse_file(filename, *, force=False, verify=True, output=None, encoding='utf-8'): extension = os.path.splitext(filename)[1][1:] if not extension: fail("Can't extract file type for file " + repr(filename)) @@ -1668,13 +1756,13 @@ def parse_file(filename, *, verify=True, output=None, encoding='utf-8'): except KeyError: fail("Can't identify file type for file " + repr(filename)) - clinic = Clinic(language, verify=verify, filename=filename) + clinic = Clinic(language, force=force, verify=verify, filename=filename) with open(filename, 'r', encoding=encoding) as f: raw = f.read() cooked = clinic.parse(raw) - if cooked == raw: + if (cooked == raw) and not force: return directory = os.path.dirname(filename) or '.' @@ -1687,9 +1775,12 @@ def parse_file(filename, *, verify=True, output=None, encoding='utf-8'): os.replace(tmpfilename, output or filename) -def compute_checksum(input): +def compute_checksum(input, length=None): input = input or '' - return hashlib.sha1(input.encode('utf-8')).hexdigest() + s = hashlib.sha1(input.encode('utf-8')).hexdigest() + if length: + s = s[:length] + return s @@ -1826,7 +1917,8 @@ class Function: module, cls=None, c_basename=None, full_name=None, return_converter, return_annotation=_empty, - docstring=None, kind=CALLABLE, coexist=False): + docstring=None, kind=CALLABLE, coexist=False, + suppress_signature=False): self.parameters = parameters or collections.OrderedDict() self.return_annotation = return_annotation self.name = name @@ -1840,6 +1932,7 @@ class Function: self.kind = kind self.coexist = coexist self.self_converter = None + self.suppress_signature = suppress_signature @property def methoddef_flags(self): @@ -3520,6 +3613,7 @@ class DSLParser: else: fail("Function " + self.function.name + " has an unsupported group configuration. (Unexpected state " + str(self.parameter_state) + ".b)") self.group += 1 + self.function.suppress_signature = True elif symbol == ']': if not self.group: fail("Function " + self.function.name + " has a ] without a matching [.") @@ -3615,11 +3709,14 @@ class DSLParser: ## docstring first line ## - if new_or_init: - assert f.cls - add(f.cls.name) + if not f.suppress_signature: + add('sig=') else: - add(f.name) + if new_or_init: + assert f.cls + add(f.cls.name) + else: + add(f.name) add('(') # populate "right_bracket_count" field for every parameter @@ -3673,7 +3770,17 @@ class DSLParser: add_comma = True name = p.converter.signature_name or p.name - a = [name] + + a = [] + if isinstance(p.converter, self_converter) and not f.suppress_signature: + # annotate first parameter as being a "self". + # + # if inspect.Signature gets this function, and it's already bound, + # the self parameter will be stripped off. + # + # if it's not bound, it should be marked as positional-only. + a.append('$') + a.append(name) if p.converter.is_optional(): a.append('=') value = p.converter.py_default @@ -3915,7 +4022,7 @@ def main(argv): path = os.path.join(root, filename) if ns.verbose: print(path) - parse_file(path, verify=not ns.force) + parse_file(path, force=ns.force, verify=not ns.force) return if not ns.filename: @@ -3931,7 +4038,7 @@ def main(argv): for filename in ns.filename: if ns.verbose: print(filename) - parse_file(filename, output=ns.output, verify=not ns.force) + parse_file(filename, output=ns.output, force=ns.force, verify=not ns.force) if __name__ == "__main__": |