diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2017-09-10 17:23:36 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-10 17:23:36 (GMT) |
commit | 8b57d7363916869357848e666d03fa7614c47897 (patch) | |
tree | ce2c871ab523dbbf05bd80ee9dcfd1f2534d5798 /Lib/collections | |
parent | 3cedf46cdbeefc019f4a672c1104f3d5e94712bd (diff) | |
download | cpython-8b57d7363916869357848e666d03fa7614c47897.zip cpython-8b57d7363916869357848e666d03fa7614c47897.tar.gz cpython-8b57d7363916869357848e666d03fa7614c47897.tar.bz2 |
bpo-28638: Optimize namedtuple() creation time by minimizing use of exec() (#3454)
* Working draft without _source
* Re-use itemgetter() instances
* Speed-up calls to __new__() with a pre-bound tuple.__new__()
* Add note regarding string interning
* Remove unnecessary create function wrappers
* Minor sync-ups with PR-2736. Mostly formatting and f-strings
* Bring-in qualname/__module fix-ups from PR-2736
* Formally remove the verbose flag and _source attribute
* Restore a test of potentially problematic field names
* Restore kwonly_args test but without the verbose option
* Adopt Inada's idea to reuse the docstrings for the itemgetters
* Neaten-up a bit
* Add news blurb
* Serhiy pointed-out the need for interning
* Jelle noticed as missing f on an f-string
* Add whatsnew entry for feature removal
* Accede to request for dict literals instead keyword arguments
* Leave the method.__module__ attribute pointing the actual location of the code
* Improve variable names and add a micro-optimization for an non-public helper function
* Simplify by in-lining reuse_itemgetter()
* Arrange steps in more logical order
* Save docstring in local cache instead of interning
Diffstat (limited to 'Lib/collections')
-rw-r--r-- | Lib/collections/__init__.py | 162 |
1 files changed, 85 insertions, 77 deletions
diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 70cb683..50cf814 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -301,59 +301,9 @@ except ImportError: ### namedtuple ################################################################################ -_class_template = """\ -from builtins import property as _property, tuple as _tuple -from operator import itemgetter as _itemgetter -from collections import OrderedDict +_nt_itemgetters = {} -class {typename}(tuple): - '{typename}({arg_list})' - - __slots__ = () - - _fields = {field_names!r} - - def __new__(_cls, {arg_list}): - 'Create new instance of {typename}({arg_list})' - return _tuple.__new__(_cls, ({arg_list})) - - @classmethod - def _make(cls, iterable, new=tuple.__new__, len=len): - 'Make a new {typename} object from a sequence or iterable' - result = new(cls, iterable) - if len(result) != {num_fields:d}: - raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result)) - return result - - def _replace(_self, **kwds): - 'Return a new {typename} object replacing specified fields with new values' - result = _self._make(map(kwds.pop, {field_names!r}, _self)) - if kwds: - raise ValueError('Got unexpected field names: %r' % list(kwds)) - return result - - def __repr__(self): - 'Return a nicely formatted representation string' - return self.__class__.__name__ + '({repr_fmt})' % self - - def _asdict(self): - 'Return a new OrderedDict which maps field names to their values.' - return OrderedDict(zip(self._fields, self)) - - def __getnewargs__(self): - 'Return self as a plain tuple. Used by copy and pickle.' - return tuple(self) - -{field_defs} -""" - -_repr_template = '{name}=%r' - -_field_template = '''\ - {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}') -''' - -def namedtuple(typename, field_names, *, verbose=False, rename=False, module=None): +def namedtuple(typename, field_names, *, rename=False, module=None): """Returns a new subclass of tuple with named fields. >>> Point = namedtuple('Point', ['x', 'y']) @@ -390,46 +340,104 @@ def namedtuple(typename, field_names, *, verbose=False, rename=False, module=Non or _iskeyword(name) or name.startswith('_') or name in seen): - field_names[index] = '_%d' % index + field_names[index] = f'_{index}' seen.add(name) for name in [typename] + field_names: if type(name) is not str: raise TypeError('Type names and field names must be strings') if not name.isidentifier(): raise ValueError('Type names and field names must be valid ' - 'identifiers: %r' % name) + f'identifiers: {name!r}') if _iskeyword(name): raise ValueError('Type names and field names cannot be a ' - 'keyword: %r' % name) + f'keyword: {name!r}') seen = set() for name in field_names: if name.startswith('_') and not rename: raise ValueError('Field names cannot start with an underscore: ' - '%r' % name) + f'{name!r}') if name in seen: - raise ValueError('Encountered duplicate field name: %r' % name) + raise ValueError(f'Encountered duplicate field name: {name!r}') seen.add(name) - # Fill-in the class template - class_definition = _class_template.format( - typename = typename, - field_names = tuple(field_names), - num_fields = len(field_names), - arg_list = repr(tuple(field_names)).replace("'", "")[1:-1], - repr_fmt = ', '.join(_repr_template.format(name=name) - for name in field_names), - field_defs = '\n'.join(_field_template.format(index=index, name=name) - for index, name in enumerate(field_names)) - ) - - # Execute the template string in a temporary namespace and support - # tracing utilities by setting a value for frame.f_globals['__name__'] - namespace = dict(__name__='namedtuple_%s' % typename) - exec(class_definition, namespace) - result = namespace[typename] - result._source = class_definition - if verbose: - print(result._source) + # Variables used in the methods and docstrings + field_names = tuple(map(_sys.intern, field_names)) + num_fields = len(field_names) + arg_list = repr(field_names).replace("'", "")[1:-1] + repr_fmt = '(' + ', '.join(f'{name}=%r' for name in field_names) + ')' + tuple_new = tuple.__new__ + _len = len + + # Create all the named tuple methods to be added to the class namespace + + s = f'def __new__(_cls, {arg_list}): return _tuple_new(_cls, ({arg_list}))' + namespace = {'_tuple_new': tuple_new, '__name__': f'namedtuple_{typename}'} + # Note: exec() has the side-effect of interning the typename and field names + exec(s, namespace) + __new__ = namespace['__new__'] + __new__.__doc__ = f'Create new instance of {typename}({arg_list})' + + @classmethod + def _make(cls, iterable): + result = tuple_new(cls, iterable) + if _len(result) != num_fields: + raise TypeError(f'Expected {num_fields} arguments, got {len(result)}') + return result + + _make.__func__.__doc__ = (f'Make a new {typename} object from a sequence ' + 'or iterable') + + def _replace(_self, **kwds): + result = _self._make(map(kwds.pop, field_names, _self)) + if kwds: + raise ValueError(f'Got unexpected field names: {list(kwds)!r}') + return result + + _replace.__doc__ = (f'Return a new {typename} object replacing specified ' + 'fields with new values') + + def __repr__(self): + 'Return a nicely formatted representation string' + return self.__class__.__name__ + repr_fmt % self + + def _asdict(self): + 'Return a new OrderedDict which maps field names to their values.' + return OrderedDict(zip(self._fields, self)) + + def __getnewargs__(self): + 'Return self as a plain tuple. Used by copy and pickle.' + return tuple(self) + + # Modify function metadata to help with introspection and debugging + + for method in (__new__, _make.__func__, _replace, + __repr__, _asdict, __getnewargs__): + method.__qualname__ = f'{typename}.{method.__name__}' + + # Build-up the class namespace dictionary + # and use type() to build the result class + class_namespace = { + '__doc__': f'{typename}({arg_list})', + '__slots__': (), + '_fields': field_names, + '__new__': __new__, + '_make': _make, + '_replace': _replace, + '__repr__': __repr__, + '_asdict': _asdict, + '__getnewargs__': __getnewargs__, + } + cache = _nt_itemgetters + for index, name in enumerate(field_names): + try: + itemgetter_object, doc = cache[index] + except KeyError: + itemgetter_object = _itemgetter(index) + doc = f'Alias for field number {index}' + cache[index] = itemgetter_object, doc + class_namespace[name] = property(itemgetter_object, doc=doc) + + result = type(typename, (tuple,), class_namespace) # For pickling to work, the __module__ variable needs to be set to the frame # where the named tuple is created. Bypass this step in environments where |