From ddb52404eef6e107abbacd5d78c0f96a7e289b32 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 21 Feb 2011 19:42:11 +0000 Subject: Issue #11089: Fix performance issue limiting the use of ConfigParser() with large config files. --- Lib/collections.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Lib/configparser.py | 17 +++++----- Misc/NEWS | 3 ++ 3 files changed, 103 insertions(+), 8 deletions(-) diff --git a/Lib/collections.py b/Lib/collections.py index 89a1588..2f19459 100644 --- a/Lib/collections.py +++ b/Lib/collections.py @@ -631,6 +631,97 @@ class Counter(dict): return result +######################################################################## +### ChainMap (helper for configparser) +######################################################################## + +class _ChainMap(MutableMapping): + ''' A ChainMap groups multiple dicts (or other mappings) together + to create a single, updateable view. + + The underlying mappings are stored in a list. That list is public and can + accessed or updated using the *maps* attribute. There is no other state. + + Lookups search the underlying mappings successively until a key is found. + In contrast, writes, updates, and deletions only operate on the first + mapping. + + ''' + + def __init__(self, *maps): + '''Initialize a ChainMap by setting *maps* to the given mappings. + If no mappings are provided, a single empty dictionary is used. + + ''' + self.maps = list(maps) or [{}] # always at least one map + + def __missing__(self, key): + raise KeyError(key) + + def __getitem__(self, key): + for mapping in self.maps: + try: + return mapping[key] # can't use 'key in mapping' with defaultdict + except KeyError: + pass + return self.__missing__(key) # support subclasses that define __missing__ + + def get(self, key, default=None): + return self[key] if key in self else default + + def __len__(self): + return len(set().union(*self.maps)) # reuses stored hash values if possible + + def __iter__(self): + return iter(set().union(*self.maps)) + + def __contains__(self, key): + return any(key in m for m in self.maps) + + @_recursive_repr() + def __repr__(self): + return '{0.__class__.__name__}({1})'.format( + self, ', '.join(map(repr, self.maps))) + + @classmethod + def fromkeys(cls, iterable, *args): + 'Create a ChainMap with a single dict created from the iterable.' + return cls(dict.fromkeys(iterable, *args)) + + def copy(self): + 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' + return self.__class__(self.maps[0].copy(), *self.maps[1:]) + + __copy__ = copy + + def __setitem__(self, key, value): + self.maps[0][key] = value + + def __delitem__(self, key): + try: + del self.maps[0][key] + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def popitem(self): + 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' + try: + return self.maps[0].popitem() + except KeyError: + raise KeyError('No keys found in the first mapping.') + + def pop(self, key, *args): + 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' + try: + return self.maps[0].pop(key, *args) + except KeyError: + raise KeyError('Key not found in the first mapping: {!r}'.format(key)) + + def clear(self): + 'Clear maps[0], leaving maps[1:] intact.' + self.maps[0].clear() + + ################################################################################ ### UserDict ################################################################################ diff --git a/Lib/configparser.py b/Lib/configparser.py index f1866eb..1bfdac8 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -119,7 +119,7 @@ ConfigParser -- responsible for parsing a list of between keys and values are surrounded by spaces. """ -from collections import MutableMapping, OrderedDict as _default_dict +from collections import MutableMapping, OrderedDict as _default_dict, _ChainMap import functools import io import itertools @@ -1099,23 +1099,24 @@ class RawConfigParser(MutableMapping): return exc def _unify_values(self, section, vars): - """Create a copy of the DEFAULTSECT with values from a specific - `section' and the `vars' dictionary. If provided, values in `vars' - take precendence. + """Create a sequence of lookups with 'vars' taking priority over + the 'section' which takes priority over the DEFAULTSECT. + """ - d = self._defaults.copy() + sectiondict = {} try: - d.update(self._sections[section]) + sectiondict = self._sections[section] except KeyError: if section != self.default_section: raise NoSectionError(section) # Update with the entry specific variables + vardict = {} if vars: for key, value in vars.items(): if value is not None: value = str(value) - d[self.optionxform(key)] = value - return d + vardict[self.optionxform(key)] = value + return _ChainMap(vardict, sectiondict, self._defaults) def _convert_to_boolean(self, value): """Return a boolean value translating from other types if necessary. diff --git a/Misc/NEWS b/Misc/NEWS index 9744ed8..65ae5f9 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -15,6 +15,9 @@ Core and Builtins Library ------- +- Issue #11089: Fix performance issue limiting the use of ConfigParser() + with large config files. + - Issue #10276: Fix the results of zlib.crc32() and zlib.adler32() on buffers larger than 4GB. Patch by Nadeem Vawda. -- cgit v0.12