diff options
author | Raymond Hettinger <python@rcn.com> | 2011-02-21 19:42:11 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2011-02-21 19:42:11 (GMT) |
commit | ddb52404eef6e107abbacd5d78c0f96a7e289b32 (patch) | |
tree | eae4ce0650917c3d124e46d06e8c654b01cd1421 | |
parent | 31f5929c1e28adcaa1fdb302da366f3c7a92a98a (diff) | |
download | cpython-ddb52404eef6e107abbacd5d78c0f96a7e289b32.zip cpython-ddb52404eef6e107abbacd5d78c0f96a7e289b32.tar.gz cpython-ddb52404eef6e107abbacd5d78c0f96a7e289b32.tar.bz2 |
Issue #11089: Fix performance issue limiting the use of ConfigParser()
with large config files.
-rw-r--r-- | Lib/collections.py | 91 | ||||
-rw-r--r-- | Lib/configparser.py | 17 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 103 insertions, 8 deletions
diff --git a/Lib/collections.py b/Lib/collections.py index 89a1588..2f19459 100644 --- a/Lib/collections.py +++ b/Lib/collections.py @@ -631,6 +631,97 @@ class Counter(dict): return result +######################################################################## +### ChainMap (helper for configparser) +######################################################################## + +class _ChainMap(MutableMapping): + ''' A ChainMap groups multiple dicts (or other mappings) together + to create a single, updateable view. + + The underlying mappings are stored in a list. That list is public and can + accessed or updated using the *maps* attribute. There is no other state. + + Lookups search the underlying mappings successively until a key is found. + In contrast, writes, updates, and deletions only operate on the first + mapping. + + ''' + + def __init__(self, *maps): + '''Initialize a ChainMap by setting *maps* to the given mappings. + If no mappings are provided, a single empty dictionary is used. + + ''' + self.maps = list(maps) or [{}] # always at least one map + + def __missing__(self, key): + raise KeyError(key) + + def __getitem__(self, key): + for mapping in self.maps: + try: + return mapping[key] # can't use 'key in mapping' with defaultdict + except KeyError: + pass + return self.__missing__(key) # support subclasses that define __missing__ + + def get(self, key, default=None): + return self[key] if key in self else default + + def __len__(self): + return len(set().union(*self.maps)) # reuses stored hash values if possible + + def __iter__(self): + return iter(set().union(*self.maps)) + + def __contains__(self, key): + return any(key in m for m in self.maps) + + @_recursive_repr() + def __repr__(self): + return '{0.__class__.__name__}({1})'.format( + self, ', '.join(map(repr, self.maps))) + + @classmethod + def fromkeys(cls, iterable, *args): + 'Create a ChainMap with a single dict created from the iterable.' + return cls(dict.fromkeys(iterable, *args)) + + def copy(self): + 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' + return self.__class__(self.maps[0].copy(), *self.maps[1:]) + + __copy__ = copy + + def __setitem__(self, key, value): + self.maps[0][key] = value + + def __delitem__(self, key): + try: + del self.maps[0][key] + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def popitem(self): + 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' + try: + return self.maps[0].popitem() + except KeyError: + raise KeyError('No keys found in the first mapping.') + + def pop(self, key, *args): + 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' + try: + return self.maps[0].pop(key, *args) + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def clear(self): + 'Clear maps[0], leaving maps[1:] intact.' + self.maps[0].clear() + + ################################################################################ ### UserDict ################################################################################ diff --git a/Lib/configparser.py b/Lib/configparser.py index f1866eb..1bfdac8 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -119,7 +119,7 @@ ConfigParser -- responsible for parsing a list of between keys and values are surrounded by spaces. """ -from collections import MutableMapping, OrderedDict as _default_dict +from collections import MutableMapping, OrderedDict as _default_dict, _ChainMap import functools import io import itertools @@ -1099,23 +1099,24 @@ class RawConfigParser(MutableMapping): return exc def _unify_values(self, section, vars): - """Create a copy of the DEFAULTSECT with values from a specific - `section' and the `vars' dictionary. If provided, values in `vars' - take precendence. + """Create a sequence of lookups with 'vars' taking priority over + the 'section' which takes priority over the DEFAULTSECT. + """ - d = self._defaults.copy() + sectiondict = {} try: - d.update(self._sections[section]) + sectiondict = self._sections[section] except KeyError: if section != self.default_section: raise NoSectionError(section) # Update with the entry specific variables + vardict = {} if vars: for key, value in vars.items(): if value is not None: value = str(value) - d[self.optionxform(key)] = value - return d + vardict[self.optionxform(key)] = value + return _ChainMap(vardict, sectiondict, self._defaults) def _convert_to_boolean(self, value): """Return a boolean value translating from other types if necessary. @@ -15,6 +15,9 @@ Core and Builtins Library ------- +- Issue #11089: Fix performance issue limiting the use of ConfigParser() + with large config files. + - Issue #10276: Fix the results of zlib.crc32() and zlib.adler32() on buffers larger than 4GB. Patch by Nadeem Vawda. |