summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2011-02-21 19:38:53 (GMT)
committerRaymond Hettinger <python@rcn.com>2011-02-21 19:38:53 (GMT)
commite66036063bc2adc5297a6148b19781fa2f1b7db9 (patch)
treef199ec634fdf8b3d0dc6007c04526a12b99dd242
parentbf709fe08c04e279d77af83f7d9f577087fe86d9 (diff)
downloadcpython-e66036063bc2adc5297a6148b19781fa2f1b7db9.zip
cpython-e66036063bc2adc5297a6148b19781fa2f1b7db9.tar.gz
cpython-e66036063bc2adc5297a6148b19781fa2f1b7db9.tar.bz2
Issue #11089: Fix performance issue limiting the use of ConfigParser()
with large config files.
-rw-r--r--Lib/collections.py91
-rw-r--r--Lib/configparser.py17
-rw-r--r--Misc/NEWS3
3 files changed, 103 insertions, 8 deletions
diff --git a/Lib/collections.py b/Lib/collections.py
index 89a1588..2f19459 100644
--- a/Lib/collections.py
+++ b/Lib/collections.py
@@ -631,6 +631,97 @@ class Counter(dict):
return result
+########################################################################
+### ChainMap (helper for configparser)
+########################################################################
+
+class _ChainMap(MutableMapping):
+ ''' A ChainMap groups multiple dicts (or other mappings) together
+ to create a single, updateable view.
+
+ The underlying mappings are stored in a list. That list is public and can
+ accessed or updated using the *maps* attribute. There is no other state.
+
+ Lookups search the underlying mappings successively until a key is found.
+ In contrast, writes, updates, and deletions only operate on the first
+ mapping.
+
+ '''
+
+ def __init__(self, *maps):
+ '''Initialize a ChainMap by setting *maps* to the given mappings.
+ If no mappings are provided, a single empty dictionary is used.
+
+ '''
+ self.maps = list(maps) or [{}] # always at least one map
+
+ def __missing__(self, key):
+ raise KeyError(key)
+
+ def __getitem__(self, key):
+ for mapping in self.maps:
+ try:
+ return mapping[key] # can't use 'key in mapping' with defaultdict
+ except KeyError:
+ pass
+ return self.__missing__(key) # support subclasses that define __missing__
+
+ def get(self, key, default=None):
+ return self[key] if key in self else default
+
+ def __len__(self):
+ return len(set().union(*self.maps)) # reuses stored hash values if possible
+
+ def __iter__(self):
+ return iter(set().union(*self.maps))
+
+ def __contains__(self, key):
+ return any(key in m for m in self.maps)
+
+ @_recursive_repr()
+ def __repr__(self):
+ return '{0.__class__.__name__}({1})'.format(
+ self, ', '.join(map(repr, self.maps)))
+
+ @classmethod
+ def fromkeys(cls, iterable, *args):
+ 'Create a ChainMap with a single dict created from the iterable.'
+ return cls(dict.fromkeys(iterable, *args))
+
+ def copy(self):
+ 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
+ return self.__class__(self.maps[0].copy(), *self.maps[1:])
+
+ __copy__ = copy
+
+ def __setitem__(self, key, value):
+ self.maps[0][key] = value
+
+ def __delitem__(self, key):
+ try:
+ del self.maps[0][key]
+ except KeyError:
+ raise KeyError('Key not found in the first mapping: {!r}'.format(key))
+
+ def popitem(self):
+ 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
+ try:
+ return self.maps[0].popitem()
+ except KeyError:
+ raise KeyError('No keys found in the first mapping.')
+
+ def pop(self, key, *args):
+ 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
+ try:
+ return self.maps[0].pop(key, *args)
+ except KeyError:
+ raise KeyError('Key not found in the first mapping: {!r}'.format(key))
+
+ def clear(self):
+ 'Clear maps[0], leaving maps[1:] intact.'
+ self.maps[0].clear()
+
+
################################################################################
### UserDict
################################################################################
diff --git a/Lib/configparser.py b/Lib/configparser.py
index f1866eb..1bfdac8 100644
--- a/Lib/configparser.py
+++ b/Lib/configparser.py
@@ -119,7 +119,7 @@ ConfigParser -- responsible for parsing a list of
between keys and values are surrounded by spaces.
"""
-from collections import MutableMapping, OrderedDict as _default_dict
+from collections import MutableMapping, OrderedDict as _default_dict, _ChainMap
import functools
import io
import itertools
@@ -1099,23 +1099,24 @@ class RawConfigParser(MutableMapping):
return exc
def _unify_values(self, section, vars):
- """Create a copy of the DEFAULTSECT with values from a specific
- `section' and the `vars' dictionary. If provided, values in `vars'
- take precendence.
+ """Create a sequence of lookups with 'vars' taking priority over
+ the 'section' which takes priority over the DEFAULTSECT.
+
"""
- d = self._defaults.copy()
+ sectiondict = {}
try:
- d.update(self._sections[section])
+ sectiondict = self._sections[section]
except KeyError:
if section != self.default_section:
raise NoSectionError(section)
# Update with the entry specific variables
+ vardict = {}
if vars:
for key, value in vars.items():
if value is not None:
value = str(value)
- d[self.optionxform(key)] = value
- return d
+ vardict[self.optionxform(key)] = value
+ return _ChainMap(vardict, sectiondict, self._defaults)
def _convert_to_boolean(self, value):
"""Return a boolean value translating from other types if necessary.
diff --git a/Misc/NEWS b/Misc/NEWS
index 16d32b3..9890cb5 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -15,6 +15,9 @@ Core and Builtins
Library
-------
+- Issue #11089: Fix performance issue limiting the use of ConfigParser()
+ with large config files.
+
- Issue #10276: Fix the results of zlib.crc32() and zlib.adler32() on buffers
larger than 4GB. Patch by Nadeem Vawda.