diff options
author | Georg Brandl <georg@python.org> | 2010-07-28 13:13:46 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2010-07-28 13:13:46 (GMT) |
commit | 96a60ae90c291d94c058c80351fa38b6d73eda92 (patch) | |
tree | 68b74d560f5ff935e337ad620636624b203e00ac | |
parent | cbb0ae4a428bae3f5c29a1f8974121bb195299ef (diff) | |
download | cpython-96a60ae90c291d94c058c80351fa38b6d73eda92.zip cpython-96a60ae90c291d94c058c80351fa38b6d73eda92.tar.gz cpython-96a60ae90c291d94c058c80351fa38b6d73eda92.tar.bz2 |
#1682942: add some ConfigParser features: alternate delimiters, alternate comments, empty lines in values. Also enhance the docs with more examples and mention SafeConfigParser before ConfigParser. Patch by Lukas Langa, review by myself, Eric and Ezio.
-rw-r--r-- | Doc/library/configparser.rst | 228 | ||||
-rw-r--r-- | Lib/configparser.py | 318 | ||||
-rw-r--r-- | Lib/test/test_cfgparser.py | 280 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
4 files changed, 547 insertions, 282 deletions
diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst index 34a40ee..792784b 100644 --- a/Doc/library/configparser.rst +++ b/Doc/library/configparser.rst @@ -15,105 +15,168 @@ single: ini file single: Windows ini file -This module defines the class :class:`ConfigParser`. The :class:`ConfigParser` -class implements a basic configuration file parser language which provides a -structure similar to what you would find on Microsoft Windows INI files. You -can use this to write Python programs which can be customized by end users -easily. +This module provides the classes :class:`RawConfigParser` and +:class:`SafeConfigParser`. They implement a basic configuration file parser +language which provides a structure similar to what you would find in Microsoft +Windows INI files. You can use this to write Python programs which can be +customized by end users easily. .. note:: This library does *not* interpret or write the value-type prefixes used in the Windows Registry extended version of INI syntax. -The configuration file consists of sections, led by a ``[section]`` header and -followed by ``name: value`` entries, with continuations in the style of -:rfc:`822` (see section 3.1.1, "LONG HEADER FIELDS"); ``name=value`` is also -accepted. Note that leading whitespace is removed from values. The optional -values can contain format strings which refer to other values in the same -section, or values in a special ``DEFAULT`` section. Additional defaults can be -provided on initialization and retrieval. Lines beginning with ``'#'`` or -``';'`` are ignored and may be used to provide comments. +A configuration file consists of sections, each led by a ``[section]`` header, +followed by name/value entries separated by a specific string (``=`` or ``:`` by +default). Note that leading whitespace is removed from values. Values can be +ommitted, in which case the key/value delimiter may also be left out. Values +can also span multiple lines, as long as they are indented deeper than the first +line of the value. Depending on the parser's mode, blank lines may be treated +as parts of multiline values or ignored. + +Configuration files may include comments, prefixed by specific characters (``#`` +and ``;`` by default). Comments may appear on their own in an otherwise empty +line, or may be entered in lines holding values or spection names. In the +latter case, they need to be preceded by a whitespace character to be recognized +as a comment. (For backwards compatibility, by default only ``;`` starts an +inline comment, while ``#`` does not.) + +On top of the core functionality, :class:`SafeConfigParser` supports +interpolation. This means values can contain format strings which refer to +other values in the same section, or values in a special ``DEFAULT`` section. +Additional defaults can be provided on initialization and retrieval. For example:: - [My Section] - foodir: %(dir)s/whatever - dir=frob - long: this value continues - in the next line - -would resolve the ``%(dir)s`` to the value of ``dir`` (``frob`` in this case). -All reference expansions are done on demand. - -Default values can be specified by passing them into the :class:`ConfigParser` -constructor as a dictionary. Additional defaults may be passed into the -:meth:`get` method which will override all others. - -Sections are normally stored in a built-in dictionary. An alternative dictionary -type can be passed to the :class:`ConfigParser` constructor. For example, if a -dictionary type is passed that sorts its keys, the sections will be sorted on -write-back, as will be the keys within each section. - - -.. class:: RawConfigParser(defaults=None, dict_type=collections.OrderedDict, - allow_no_value=False) + [Paths] + home_dir: /Users + my_dir: %(home_dir)s/lumberjack + my_pictures: %(my_dir)s/Pictures + + [Multiline Values] + chorus: I'm a lumberjack, and I'm okay + I sleep all night and I work all day + + [No Values] + key_without_value + empty string value here = + + [You can use comments] ; after a useful line + ; in an empty line + after: a_value ; here's another comment + inside: a ;comment + multiline ;comment + value! ;comment + + [Sections Can Be Indented] + can_values_be_as_well = True + does_that_mean_anything_special = False + purpose = formatting for readability + multiline_values = are + handled just fine as + long as they are indented + deeper than the first line + of a value + # Did I mention we can indent comments, too? + + +In the example above, :class:`SafeConfigParser` would resolve ``%(home_dir)s`` +to the value of ``home_dir`` (``/Users`` in this case). ``%(my_dir)s`` in +effect would resolve to ``/Users/lumberjack``. All interpolations are done on +demand so keys used in the chain of references do not have to be specified in +any specific order in the configuration file. + +:class:`RawConfigParser` would simply return ``%(my_dir)s/Pictures`` as the +value of ``my_pictures`` and ``%(home_dir)s/lumberjack`` as the value of +``my_dir``. Other features presented in the example are handled in the same +manner by both parsers. + +Default values can be specified by passing them as a dictionary when +constructing the :class:`SafeConfigParser`. Additional defaults may be passed +to the :meth:`get` method which will override all others. + +Sections are normally stored in an :class:`collections.OrderedDict` which +maintains the order of all keys. An alternative dictionary type can be passed +to the :meth:`__init__` method. For example, if a dictionary type is passed +that sorts its keys, the sections will be sorted on write-back, as will be the +keys within each section. + + +.. class:: RawConfigParser(defaults=None, dict_type=collections.OrderedDict, delimiters=('=', ':'), comment_prefixes=_COMPATIBLE, empty_lines_in_values=True, allow_no_value=False) The basic configuration object. When *defaults* is given, it is initialized - into the dictionary of intrinsic defaults. When *dict_type* is given, it will - be used to create the dictionary objects for the list of sections, for the - options within a section, and for the default values. When *allow_no_value* - is true (default: ``False``), options without values are accepted; the value + into the dictionary of intrinsic defaults. When *dict_type* is given, it + will be used to create the dictionary objects for the list of sections, for + the options within a section, and for the default values. + + When *delimiters* is given, it will be used as the set of substrings that + divide keys from values. When *comment_prefixes* is given, it will be used + as the set of substrings that prefix comments in a line, both for the whole + line and inline comments. For backwards compatibility, the default value for + *comment_prefixes* is a special value that indicates that ``;`` and ``#`` can + start whole line comments while only ``;`` can start inline comments. + + When *empty_lines_in_values* is ``False`` (default: ``True``), each empty + line marks the end of an option. Otherwise, internal empty lines of a + multiline option are kept as part of the value. When *allow_no_value* is + true (default: ``False``), options without values are accepted; the value presented for these is ``None``. - This class does not - support the magical interpolation behavior. + This class does not support the magical interpolation behavior. .. versionchanged:: 3.1 The default *dict_type* is :class:`collections.OrderedDict`. .. versionchanged:: 3.2 - *allow_no_value* was added. + *delimiters*, *comment_prefixes*, *empty_lines_in_values* and + *allow_no_value* were added. -.. class:: ConfigParser(defaults=None, dict_type=collections.OrderedDict, - allow_no_value=False) +.. class:: SafeConfigParser(defaults=None, dict_type=collections.OrderedDict, delimiters=('=', ':'), comment_prefixes=('#', ';'), empty_lines_in_values=True, allow_no_value=False) - Derived class of :class:`RawConfigParser` that implements the magical - interpolation feature and adds optional arguments to the :meth:`get` and - :meth:`items` methods. The values in *defaults* must be appropriate for the - ``%()s`` string interpolation. Note that *__name__* is an intrinsic default; - its value is the section name, and will override any value provided in - *defaults*. + Derived class of :class:`ConfigParser` that implements a sane variant of the + magical interpolation feature. This implementation is more predictable as it + validates the interpolation syntax used within a configuration file. This + class also enables escaping the interpolation character (e.g. a key can have + ``%`` as part of the value by specifying ``%%`` in the file). - All option names used in interpolation will be passed through the - :meth:`optionxform` method just like any other option name reference. For - example, using the default implementation of :meth:`optionxform` (which converts - option names to lower case), the values ``foo %(bar)s`` and ``foo %(BAR)s`` are - equivalent. + Applications that don't require interpolation should use + :class:`RawConfigParser`, otherwise :class:`SafeConfigParser` is the best + option. .. versionchanged:: 3.1 The default *dict_type* is :class:`collections.OrderedDict`. .. versionchanged:: 3.2 - *allow_no_value* was added. + *delimiters*, *comment_prefixes*, *empty_lines_in_values* and + *allow_no_value* were added. + +.. class:: ConfigParser(defaults=None, dict_type=collections.OrderedDict, delimiters=('=', ':'), comment_prefixes=('#', ';'), empty_lines_in_values=True, allow_no_value=False) + + Derived class of :class:`RawConfigParser` that implements the magical + interpolation feature and adds optional arguments to the :meth:`get` and + :meth:`items` methods. -.. class:: SafeConfigParser(defaults=None, dict_type=collections.OrderedDict, - allow_no_value=False) + :class:`SafeConfigParser` is generally recommended over this class if you + need interpolation. - Derived class of :class:`ConfigParser` that implements a more-sane variant of - the magical interpolation feature. This implementation is more predictable as - well. New applications should prefer this version if they don't need to be - compatible with older versions of Python. + The values in *defaults* must be appropriate for the ``%()s`` string + interpolation. Note that *__name__* is an intrinsic default; its value is + the section name, and will override any value provided in *defaults*. - .. XXX Need to explain what's safer/more predictable about it. + All option names used in interpolation will be passed through the + :meth:`optionxform` method just like any other option name reference. For + example, using the default implementation of :meth:`optionxform` (which + converts option names to lower case), the values ``foo %(bar)s`` and ``foo + %(BAR)s`` are equivalent. .. versionchanged:: 3.1 The default *dict_type* is :class:`collections.OrderedDict`. .. versionchanged:: 3.2 - *allow_no_value* was added. + *delimiters*, *comment_prefixes*, *empty_lines_in_values* and + *allow_no_value* were added. .. exception:: NoSectionError @@ -295,11 +358,13 @@ RawConfigParser Objects interpolation and output to files) can only be achieved using string values. -.. method:: RawConfigParser.write(fileobject) +.. method:: RawConfigParser.write(fileobject, space_around_delimiters=True) Write a representation of the configuration to the specified file object, which must be opened in text mode (accepting strings). This representation - can be parsed by a future :meth:`read` call. + can be parsed by a future :meth:`read` call. If ``space_around_delimiters`` + is ``True`` (the default), delimiters between keys and values are surrounded + by spaces. .. method:: RawConfigParser.remove_option(section, option) @@ -342,21 +407,24 @@ ConfigParser Objects -------------------- The :class:`ConfigParser` class extends some methods of the -:class:`RawConfigParser` interface, adding some optional arguments. +:class:`RawConfigParser` interface, adding some optional arguments. Whenever you +can, consider using :class:`SafeConfigParser` which adds validation and escaping +for the interpolation. .. method:: ConfigParser.get(section, option, raw=False, vars=None) - Get an *option* value for the named *section*. All the ``'%'`` interpolations - are expanded in the return values, based on the defaults passed into the - constructor, as well as the options *vars* provided, unless the *raw* argument - is true. + Get an *option* value for the named *section*. All the ``'%'`` + interpolations are expanded in the return values, based on the defaults + passed into the :meth:`__init__` method, as well as the options *vars* + provided, unless the *raw* argument is true. .. method:: ConfigParser.items(section, raw=False, vars=None) - Return a list of ``(name, value)`` pairs for each option in the given *section*. - Optional arguments have the same meaning as for the :meth:`get` method. + Return a list of ``(name, value)`` pairs for each option in the given + *section*. Optional arguments have the same meaning as for the :meth:`get` + method. .. _safeconfigparser-objects: @@ -466,8 +534,8 @@ The function ``opt_move`` below can be used to move options between sections:: Some configuration files are known to include settings without values, but which otherwise conform to the syntax supported by :mod:`configparser`. The -*allow_no_value* parameter to the constructor can be used to indicate that such -values should be accepted: +*allow_no_value* parameter to the :meth:`__init__` method can be used to +indicate that such values should be accepted: .. doctest:: @@ -476,12 +544,12 @@ values should be accepted: >>> sample_config = """ ... [mysqld] - ... user = mysql - ... pid-file = /var/run/mysqld/mysqld.pid - ... skip-external-locking - ... old_passwords = 1 - ... skip-bdb - ... skip-innodb + ... user = mysql + ... pid-file = /var/run/mysqld/mysqld.pid + ... skip-external-locking + ... old_passwords = 1 + ... skip-bdb + ... skip-innodb # we don't need ACID today ... """ >>> config = configparser.RawConfigParser(allow_no_value=True) >>> config.readfp(io.BytesIO(sample_config)) diff --git a/Lib/configparser.py b/Lib/configparser.py index 2fbedf8..d979e6c 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1,6 +1,6 @@ """Configuration file parser. -A setup file consists of sections, lead by a "[section]" header, +A configuration file consists of sections, lead by a "[section]" header, and followed by "name: value" entries, with continuations and such in the style of RFC 822. @@ -24,67 +24,88 @@ ConfigParser -- responsible for parsing a list of methods: - __init__(defaults=None) - create the parser and specify a dictionary of intrinsic defaults. The - keys must be strings, the values must be appropriate for %()s string - interpolation. Note that `__name__' is always an intrinsic default; - its value is the section's name. + __init__(defaults=None, dict_type=_default_dict, + delimiters=('=', ':'), comment_prefixes=('#', ';'), + empty_lines_in_values=True, allow_no_value=False): + Create the parser. When `defaults' is given, it is initialized into the + dictionary or intrinsic defaults. The keys must be strings, the values + must be appropriate for %()s string interpolation. Note that `__name__' + is always an intrinsic default; its value is the section's name. + + When `dict_type' is given, it will be used to create the dictionary + objects for the list of sections, for the options within a section, and + for the default values. + + When `delimiters' is given, it will be used as the set of substrings + that divide keys from values. + + When `comment_prefixes' is given, it will be used as the set of + substrings that prefix comments in a line. + + When `empty_lines_in_values' is False (default: True), each empty line + marks the end of an option. Otherwise, internal empty lines of + a multiline option are kept as part of the value. + + When `allow_no_value' is True (default: False), options without + values are accepted; the value presented for these is None. sections() - return all the configuration section names, sans DEFAULT + Return all the configuration section names, sans DEFAULT. has_section(section) - return whether the given section exists + Return whether the given section exists. has_option(section, option) - return whether the given option exists in the given section + Return whether the given option exists in the given section. options(section) - return list of configuration options for the named section + Return list of configuration options for the named section. read(filenames) - read and parse the list of named configuration files, given by + Read and parse the list of named configuration files, given by name. A single filename is also allowed. Non-existing files are ignored. Return list of successfully read files. readfp(fp, filename=None) - read and parse one configuration file, given as a file object. + Read and parse one configuration file, given as a file object. The filename defaults to fp.name; it is only used in error messages (if fp has no `name' attribute, the string `<???>' is used). get(section, option, raw=False, vars=None) - return a string value for the named option. All % interpolations are + Return a string value for the named option. All % interpolations are expanded in the return values, based on the defaults passed into the constructor and the DEFAULT section. Additional substitutions may be provided using the `vars' argument, which must be a dictionary whose contents override any pre-existing defaults. getint(section, options) - like get(), but convert value to an integer + Like get(), but convert value to an integer. getfloat(section, options) - like get(), but convert value to a float + Like get(), but convert value to a float. getboolean(section, options) - like get(), but convert value to a boolean (currently case + Like get(), but convert value to a boolean (currently case insensitively defined as 0, false, no, off for False, and 1, true, yes, on for True). Returns False or True. items(section, raw=False, vars=None) - return a list of tuples with (name, value) for each option + Return a list of tuples with (name, value) for each option in the section. remove_section(section) - remove the given file section and all its options + Remove the given file section and all its options. remove_option(section, option) - remove the given option from the given section + Remove the given option from the given section. set(section, option, value) - set the given option + Set the given option. - write(fp) - write the configuration state in .ini format + write(fp, space_around_delimiters=True) + Write the configuration state in .ini format. If + `space_around_delimiters' is True (the default), delimiters + between keys and values are surrounded by spaces. """ try: @@ -94,6 +115,7 @@ except ImportError: _default_dict = dict import re +import sys __all__ = ["NoSectionError", "DuplicateSectionError", "NoOptionError", "InterpolationError", "InterpolationDepthError", @@ -114,17 +136,19 @@ class Error(Exception): def _get_message(self): """Getter for 'message'; needed only to override deprecation in - BaseException.""" + BaseException. + """ return self.__message def _set_message(self, value): """Setter for 'message'; needed only to override deprecation in - BaseException.""" + BaseException. + """ self.__message = value # BaseException.message has been deprecated since Python 2.6. To prevent - # DeprecationWarning from popping up over this pre-existing attribute, use - # a new property that takes lookup precedence. + # DeprecationWarning from popping up over this pre-existing attribute, use a + # new property that takes lookup precedence. message = property(_get_message, _set_message) def __init__(self, msg=''): @@ -136,6 +160,7 @@ class Error(Exception): __str__ = __repr__ + class NoSectionError(Error): """Raised when no section matches a requested option.""" @@ -144,6 +169,7 @@ class NoSectionError(Error): self.section = section self.args = (section, ) + class DuplicateSectionError(Error): """Raised when a section is multiply-created.""" @@ -152,6 +178,7 @@ class DuplicateSectionError(Error): self.section = section self.args = (section, ) + class NoOptionError(Error): """A requested option was not found.""" @@ -162,6 +189,7 @@ class NoOptionError(Error): self.section = section self.args = (option, section) + class InterpolationError(Error): """Base class for interpolation-related exceptions.""" @@ -171,6 +199,7 @@ class InterpolationError(Error): self.section = section self.args = (option, section, msg) + class InterpolationMissingOptionError(InterpolationError): """A string substitution required a setting which was not available.""" @@ -185,10 +214,12 @@ class InterpolationMissingOptionError(InterpolationError): self.reference = reference self.args = (option, section, rawval, reference) + class InterpolationSyntaxError(InterpolationError): """Raised when the source text into which substitutions are made does not conform to the required syntax.""" + class InterpolationDepthError(InterpolationError): """Raised when substitutions are nested too deeply.""" @@ -201,6 +232,7 @@ class InterpolationDepthError(InterpolationError): InterpolationError.__init__(self, option, section, msg) self.args = (option, section, rawval) + class ParsingError(Error): """Raised when a configuration file does not follow legal syntax.""" @@ -214,6 +246,7 @@ class ParsingError(Error): self.errors.append((lineno, line)) self.message += '\n\t[line %2d]: %s' % (lineno, line) + class MissingSectionHeaderError(ParsingError): """Raised when a key-value pair is found before any section header.""" @@ -227,19 +260,74 @@ class MissingSectionHeaderError(ParsingError): self.line = line self.args = (filename, lineno, line) + class RawConfigParser: + """ConfigParser that does not do interpolation.""" + + # Regular expressions for parsing section headers and options + _SECT_TMPL = r""" + \[ # [ + (?P<header>[^]]+) # very permissive! + \] # ] + """ + _OPT_TMPL = r""" + (?P<option>.*?) # very permissive! + \s*(?P<vi>{delim})\s* # any number of space/tab, + # followed by any of the + # allowed delimiters, + # followed by any space/tab + (?P<value>.*)$ # everything up to eol + """ + _OPT_NV_TMPL = r""" + (?P<option>.*?) # very permissive! + \s*(?: # any number of space/tab, + (?P<vi>{delim})\s* # optionally followed by + # any of the allowed + # delimiters, followed by any + # space/tab + (?P<value>.*))?$ # everything up to eol + """ + + # Compiled regular expression for matching sections + SECTCRE = re.compile(_SECT_TMPL, re.VERBOSE) + # Compiled regular expression for matching options with typical separators + OPTCRE = re.compile(_OPT_TMPL.format(delim="=|:"), re.VERBOSE) + # Compiled regular expression for matching options with optional values + # delimited using typical separators + OPTCRE_NV = re.compile(_OPT_NV_TMPL.format(delim="=|:"), re.VERBOSE) + # Compiled regular expression for matching leading whitespace in a line + NONSPACECRE = re.compile(r"\S") + # Select backwards-compatible inline comment character behavior + # (; and # are comments at the start of a line, but ; only inline) + _COMPATIBLE = object() + def __init__(self, defaults=None, dict_type=_default_dict, - allow_no_value=False): + delimiters=('=', ':'), comment_prefixes=_COMPATIBLE, + empty_lines_in_values=True, allow_no_value=False): self._dict = dict_type self._sections = self._dict() self._defaults = self._dict() - if allow_no_value: - self._optcre = self.OPTCRE_NV - else: - self._optcre = self.OPTCRE if defaults: for key, value in defaults.items(): self._defaults[self.optionxform(key)] = value + self._delimiters = tuple(delimiters) + if delimiters == ('=', ':'): + self._optcre = self.OPTCRE_NV if allow_no_value else self.OPTCRE + else: + delim = "|".join(re.escape(d) for d in delimiters) + if allow_no_value: + self._optcre = re.compile(self._OPT_NV_TMPL.format(delim=delim), + re.VERBOSE) + else: + self._optcre = re.compile(self._OPT_TMPL.format(delim=delim), + re.VERBOSE) + if comment_prefixes is self._COMPATIBLE: + self._startonly_comment_prefixes = ('#',) + self._comment_prefixes = (';',) + else: + self._startonly_comment_prefixes = () + self._comment_prefixes = tuple(comment_prefixes or ()) + self._empty_lines_in_values = empty_lines_in_values def defaults(self): return self._defaults @@ -313,7 +401,6 @@ class RawConfigParser: second argument is the `filename', which if not given, is taken from fp.name. If fp has no `name' attribute, `<???>' is used. - """ if filename is None: try: @@ -374,6 +461,7 @@ class RawConfigParser: def has_option(self, section, option): """Check for the existence of a given option in a given section.""" + if not section or section == DEFAULTSECT: option = self.optionxform(option) return option in self._defaults @@ -386,6 +474,7 @@ class RawConfigParser: def set(self, section, option, value=None): """Set an option.""" + if not section or section == DEFAULTSECT: sectdict = self._defaults else: @@ -395,22 +484,34 @@ class RawConfigParser: raise NoSectionError(section) sectdict[self.optionxform(option)] = value - def write(self, fp): - """Write an .ini-format representation of the configuration state.""" + def write(self, fp, space_around_delimiters=True): + """Write an .ini-format representation of the configuration state. + + If `space_around_delimiters' is True (the default), delimiters + between keys and values are surrounded by spaces. + """ + if space_around_delimiters: + d = " {} ".format(self._delimiters[0]) + else: + d = self._delimiters[0] if self._defaults: - fp.write("[%s]\n" % DEFAULTSECT) - for (key, value) in self._defaults.items(): - fp.write("%s = %s\n" % (key, str(value).replace('\n', '\n\t'))) - fp.write("\n") + self._write_section(fp, DEFAULTSECT, self._defaults.items(), d) for section in self._sections: - fp.write("[%s]\n" % section) - for (key, value) in self._sections[section].items(): - if key == "__name__": - continue - if value is not None: - key = " = ".join((key, str(value).replace('\n', '\n\t'))) - fp.write("%s\n" % (key)) - fp.write("\n") + self._write_section(fp, section, + self._sections[section].items(), d) + + def _write_section(self, fp, section_name, section_items, delimiter): + """Write a single section to the specified `fp'.""" + fp.write("[{}]\n".format(section_name)) + for key, value in section_items: + if key == "__name__": + continue + if value is not None: + value = delimiter + str(value).replace('\n', '\n\t') + else: + value = "" + fp.write("{}{}\n".format(key, value)) + fp.write("\n") def remove_option(self, section, option): """Remove an option.""" @@ -434,66 +535,63 @@ class RawConfigParser: del self._sections[section] return existed - # - # Regular expressions for parsing section headers and options. - # - SECTCRE = re.compile( - r'\[' # [ - r'(?P<header>[^]]+)' # very permissive! - r'\]' # ] - ) - OPTCRE = re.compile( - r'(?P<option>[^:=\s][^:=]*)' # very permissive! - r'\s*(?P<vi>[:=])\s*' # any number of space/tab, - # followed by separator - # (either : or =), followed - # by any # space/tab - r'(?P<value>.*)$' # everything up to eol - ) - OPTCRE_NV = re.compile( - r'(?P<option>[^:=\s][^:=]*)' # very permissive! - r'\s*(?:' # any number of space/tab, - r'(?P<vi>[:=])\s*' # optionally followed by - # separator (either : or - # =), followed by any # - # space/tab - r'(?P<value>.*))?$' # everything up to eol - ) - def _read(self, fp, fpname): - """Parse a sectioned setup file. - - The sections in setup file contains a title line at the top, - indicated by a name in square brackets (`[]'), plus key/value - options lines, indicated by `name: value' format lines. - Continuations are represented by an embedded newline then - leading whitespace. Blank lines, lines beginning with a '#', - and just about everything else are ignored. + """Parse a sectioned configuration file. + + Each section in a configuration file contains a header, indicated by a + name in square brackets (`[]'), plus key/value options, indicated by + `name' and `value' delimited with a specific substring (`=' or `:' by + default). + + Values can span multiple lines, as long as they are indented deeper than + the first line of the value. Depending on the parser's mode, blank lines + may be treated as parts of multiline values or ignored. + + Configuration files may include comments, prefixed by specific + characters (`#' and `;' by default). Comments may appear on their own in + an otherwise empty line or may be entered in lines holding values or + section names. """ cursect = None # None, or a dictionary optname = None lineno = 0 + indent_level = 0 e = None # None, or an exception - while True: - line = fp.readline() - if not line: - break - lineno = lineno + 1 - # comment or blank line? - if line.strip() == '' or line[0] in '#;': - continue - if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR": - # no leading whitespace + for lineno, line in enumerate(fp, start=1): + # strip prefix-only comments + comment_start = None + for prefix in self._startonly_comment_prefixes: + if line.strip().startswith(prefix): + comment_start = 0 + break + # strip inline comments + for prefix in self._comment_prefixes: + index = line.find(prefix) + if index == 0 or (index > 0 and line[index-1].isspace()): + comment_start = index + break + value = line[:comment_start].strip() + if not value: + if self._empty_lines_in_values and comment_start is None: + # add empty line to the value, but only if there was no + # comment on the line + if cursect is not None and optname: + cursect[optname].append('\n') + else: + # empty line marks end of value + indent_level = sys.maxsize continue # continuation line? - if line[0].isspace() and cursect is not None and optname: - value = line.strip() - if value: - cursect[optname].append(value) + first_nonspace = self.NONSPACECRE.search(line) + cur_indent_level = first_nonspace.start() if first_nonspace else 0 + if (cursect is not None and optname and + cur_indent_level > indent_level): + cursect[optname].append(value) # a section header or option header? else: + indent_level = cur_indent_level # is it a section header? - mo = self.SECTCRE.match(line) + mo = self.SECTCRE.match(value) if mo: sectname = mo.group('header') if sectname in self._sections: @@ -511,19 +609,15 @@ class RawConfigParser: raise MissingSectionHeaderError(fpname, lineno, line) # an option line? else: - mo = self._optcre.match(line) + mo = self._optcre.match(value) if mo: optname, vi, optval = mo.group('option', 'vi', 'value') + if not optname: + e = self._handle_error(e, fpname, lineno, line) optname = self.optionxform(optname.rstrip()) # This check is fine because the OPTCRE cannot # match if it would set optval to None if optval is not None: - if vi in ('=', ':') and ';' in optval: - # ';' is a comment delimiter only if it follows - # a spacing character - pos = optval.find(';') - if pos != -1 and optval[pos-1].isspace(): - optval = optval[:pos] optval = optval.strip() # allow empty values if optval == '""': @@ -533,26 +627,35 @@ class RawConfigParser: # valueless option handling cursect[optname] = optval else: - # a non-fatal parsing error occurred. set up the + # a non-fatal parsing error occurred. set up the # exception but keep going. the exception will be # raised at the end of the file and will contain a # list of all bogus lines - if not e: - e = ParsingError(fpname) - e.append(lineno, repr(line)) + e = self._handle_error(e, fpname, lineno, line) # if any parsing errors occurred, raise an exception if e: raise e + self._join_multiline_values() - # join the multi-line values collected while reading + def _join_multiline_values(self): all_sections = [self._defaults] all_sections.extend(self._sections.values()) for options in all_sections: for name, val in options.items(): if isinstance(val, list): + if val[-1] == '\n': + val = val[:-1] options[name] = '\n'.join(val) + def _handle_error(self, exc, fpname, lineno, line): + if not exc: + exc = ParsingError(fpname) + exc.append(lineno, repr(line)) + return exc + + class ConfigParser(RawConfigParser): + """ConfigParser implementing interpolation.""" def get(self, section, option, raw=False, vars=None): """Get an option value for a given section. @@ -648,6 +751,7 @@ class ConfigParser(RawConfigParser): class SafeConfigParser(ConfigParser): + """ConfigParser implementing sane interpolation.""" def _interpolate(self, section, option, rawval, vars): # do the string interpolation diff --git a/Lib/test/test_cfgparser.py b/Lib/test/test_cfgparser.py index c5a2595..e00a51a 100644 --- a/Lib/test/test_cfgparser.py +++ b/Lib/test/test_cfgparser.py @@ -3,6 +3,7 @@ import configparser import io import os import unittest +import textwrap from test import support @@ -23,15 +24,26 @@ class SortedDict(collections.UserDict): def itervalues(self): return iter(self.values()) -class TestCaseBase(unittest.TestCase): +class CfgParserTestCaseClass(unittest.TestCase): allow_no_value = False + delimiters = ('=', ':') + comment_prefixes = (';', '#') + empty_lines_in_values = True + dict_type = configparser._default_dict def newconfig(self, defaults=None): + arguments = dict( + allow_no_value=self.allow_no_value, + delimiters=self.delimiters, + comment_prefixes=self.comment_prefixes, + empty_lines_in_values=self.empty_lines_in_values, + dict_type=self.dict_type, + ) if defaults is None: - self.cf = self.config_class(allow_no_value=self.allow_no_value) + self.cf = self.config_class(**arguments) else: self.cf = self.config_class(defaults, - allow_no_value=self.allow_no_value) + **arguments) return self.cf def fromstring(self, string, defaults=None): @@ -40,27 +52,33 @@ class TestCaseBase(unittest.TestCase): cf.readfp(sio) return cf +class BasicTestCase(CfgParserTestCaseClass): + def test_basic(self): - config_string = ( - "[Foo Bar]\n" - "foo=bar\n" - "[Spacey Bar]\n" - "foo = bar\n" - "[Commented Bar]\n" - "foo: bar ; comment\n" - "[Long Line]\n" - "foo: this line is much, much longer than my editor\n" - " likes it.\n" - "[Section\\with$weird%characters[\t]\n" - "[Internationalized Stuff]\n" - "foo[bg]: Bulgarian\n" - "foo=Default\n" - "foo[en]=English\n" - "foo[de]=Deutsch\n" - "[Spaces]\n" - "key with spaces : value\n" - "another with spaces = splat!\n" - ) + config_string = """\ +[Foo Bar] +foo{0[0]}bar +[Spacey Bar] +foo {0[0]} bar +[Spacey Bar From The Beginning] + foo {0[0]} bar + baz {0[0]} qwe +[Commented Bar] +foo{0[1]} bar {1[1]} comment +baz{0[0]}qwe {1[0]}another one +[Long Line] +foo{0[1]} this line is much, much longer than my editor + likes it. +[Section\\with$weird%characters[\t] +[Internationalized Stuff] +foo[bg]{0[1]} Bulgarian +foo{0[0]}Default +foo[en]{0[0]}English +foo[de]{0[0]}Deutsch +[Spaces] +key with spaces {0[1]} value +another with spaces {0[0]} splat! +""".format(self.delimiters, self.comment_prefixes) if self.allow_no_value: config_string += ( "[NoValue]\n" @@ -70,13 +88,14 @@ class TestCaseBase(unittest.TestCase): cf = self.fromstring(config_string) L = cf.sections() L.sort() - E = [r'Commented Bar', - r'Foo Bar', - r'Internationalized Stuff', - r'Long Line', - r'Section\with$weird%characters[' '\t', - r'Spaces', - r'Spacey Bar', + E = ['Commented Bar', + 'Foo Bar', + 'Internationalized Stuff', + 'Long Line', + 'Section\\with$weird%characters[\t', + 'Spaces', + 'Spacey Bar', + 'Spacey Bar From The Beginning', ] if self.allow_no_value: E.append(r'NoValue') @@ -89,7 +108,10 @@ class TestCaseBase(unittest.TestCase): # http://www.python.org/sf/583248 eq(cf.get('Foo Bar', 'foo'), 'bar') eq(cf.get('Spacey Bar', 'foo'), 'bar') + eq(cf.get('Spacey Bar From The Beginning', 'foo'), 'bar') + eq(cf.get('Spacey Bar From The Beginning', 'baz'), 'qwe') eq(cf.get('Commented Bar', 'foo'), 'bar') + eq(cf.get('Commented Bar', 'baz'), 'qwe') eq(cf.get('Spaces', 'key with spaces'), 'value') eq(cf.get('Spaces', 'another with spaces'), 'splat!') if self.allow_no_value: @@ -140,12 +162,14 @@ class TestCaseBase(unittest.TestCase): # SF bug #432369: cf = self.fromstring( - "[MySection]\nOption: first line\n\tsecond line\n") + "[MySection]\nOption{} first line\n\tsecond line\n".format( + self.delimiters[0])) eq(cf.options("MySection"), ["option"]) eq(cf.get("MySection", "Option"), "first line\nsecond line") # SF bug #561822: - cf = self.fromstring("[section]\nnekey=nevalue\n", + cf = self.fromstring("[section]\n" + "nekey{}nevalue\n".format(self.delimiters[0]), defaults={"key":"value"}) self.assertTrue(cf.has_option("section", "Key")) @@ -162,18 +186,19 @@ class TestCaseBase(unittest.TestCase): def test_parse_errors(self): self.newconfig() - e = self.parse_error(configparser.ParsingError, - "[Foo]\n extra-spaces: splat\n") - self.assertEqual(e.args, ('<???>',)) - self.parse_error(configparser.ParsingError, - "[Foo]\n extra-spaces= splat\n") self.parse_error(configparser.ParsingError, - "[Foo]\n:value-without-option-name\n") + "[Foo]\n" + "{}val-without-opt-name\n".format(self.delimiters[0])) self.parse_error(configparser.ParsingError, - "[Foo]\n=value-without-option-name\n") + "[Foo]\n" + "{}val-without-opt-name\n".format(self.delimiters[1])) e = self.parse_error(configparser.MissingSectionHeaderError, "No Section!\n") self.assertEqual(e.args, ('<???>', 1, "No Section!\n")) + if not self.allow_no_value: + e = self.parse_error(configparser.ParsingError, + "[Foo]\n wrong-indent\n") + self.assertEqual(e.args, ('<???>',)) def parse_error(self, exc, src): sio = io.StringIO(src) @@ -188,9 +213,9 @@ class TestCaseBase(unittest.TestCase): self.assertFalse(cf.has_section("Foo"), "new ConfigParser should have no acknowledged " "sections") - with self.assertRaises(configparser.NoSectionError) as cm: + with self.assertRaises(configparser.NoSectionError): cf.options("Foo") - with self.assertRaises(configparser.NoSectionError) as cm: + with self.assertRaises(configparser.NoSectionError): cf.set("foo", "bar", "value") e = self.get_error(configparser.NoSectionError, "foo", "bar") self.assertEqual(e.args, ("foo",)) @@ -210,21 +235,21 @@ class TestCaseBase(unittest.TestCase): def test_boolean(self): cf = self.fromstring( "[BOOLTEST]\n" - "T1=1\n" - "T2=TRUE\n" - "T3=True\n" - "T4=oN\n" - "T5=yes\n" - "F1=0\n" - "F2=FALSE\n" - "F3=False\n" - "F4=oFF\n" - "F5=nO\n" - "E1=2\n" - "E2=foo\n" - "E3=-1\n" - "E4=0.1\n" - "E5=FALSE AND MORE" + "T1{equals}1\n" + "T2{equals}TRUE\n" + "T3{equals}True\n" + "T4{equals}oN\n" + "T5{equals}yes\n" + "F1{equals}0\n" + "F2{equals}FALSE\n" + "F3{equals}False\n" + "F4{equals}oFF\n" + "F5{equals}nO\n" + "E1{equals}2\n" + "E2{equals}foo\n" + "E3{equals}-1\n" + "E4{equals}0.1\n" + "E5{equals}FALSE AND MORE".format(equals=self.delimiters[0]) ) for x in range(1, 5): self.assertTrue(cf.getboolean('BOOLTEST', 't%d' % x)) @@ -242,11 +267,17 @@ class TestCaseBase(unittest.TestCase): def test_write(self): config_string = ( "[Long Line]\n" - "foo: this line is much, much longer than my editor\n" + "foo{0[0]} this line is much, much longer than my editor\n" " likes it.\n" "[DEFAULT]\n" - "foo: another very\n" + "foo{0[1]} another very\n" " long line\n" + "[Long Line - With Comments!]\n" + "test {0[1]} we {comment} can\n" + " also {comment} place\n" + " comments {comment} in\n" + " multiline {comment} values" + "\n".format(self.delimiters, comment=self.comment_prefixes[0]) ) if self.allow_no_value: config_string += ( @@ -259,13 +290,19 @@ class TestCaseBase(unittest.TestCase): cf.write(output) expect_string = ( "[DEFAULT]\n" - "foo = another very\n" + "foo {equals} another very\n" "\tlong line\n" "\n" "[Long Line]\n" - "foo = this line is much, much longer than my editor\n" + "foo {equals} this line is much, much longer than my editor\n" "\tlikes it.\n" "\n" + "[Long Line - With Comments!]\n" + "test {equals} we\n" + "\talso\n" + "\tcomments\n" + "\tmultiline\n" + "\n".format(equals=self.delimiters[0]) ) if self.allow_no_value: expect_string += ( @@ -277,7 +314,7 @@ class TestCaseBase(unittest.TestCase): def test_set_string_types(self): cf = self.fromstring("[sect]\n" - "option1=foo\n") + "option1{eq}foo\n".format(eq=self.delimiters[0])) # Check that we don't get an exception when setting values in # an existing section using strings: class mystr(str): @@ -290,6 +327,9 @@ class TestCaseBase(unittest.TestCase): cf.set("sect", "option2", "splat") def test_read_returns_file_list(self): + if self.delimiters[0] != '=': + # skip reading the file if we're using an incompatible format + return file1 = support.findfile("cfgparser.1") # check when we pass a mix of readable and non-readable files: cf = self.newconfig() @@ -314,45 +354,45 @@ class TestCaseBase(unittest.TestCase): def get_interpolation_config(self): return self.fromstring( "[Foo]\n" - "bar=something %(with1)s interpolation (1 step)\n" - "bar9=something %(with9)s lots of interpolation (9 steps)\n" - "bar10=something %(with10)s lots of interpolation (10 steps)\n" - "bar11=something %(with11)s lots of interpolation (11 steps)\n" - "with11=%(with10)s\n" - "with10=%(with9)s\n" - "with9=%(with8)s\n" - "with8=%(With7)s\n" - "with7=%(WITH6)s\n" - "with6=%(with5)s\n" - "With5=%(with4)s\n" - "WITH4=%(with3)s\n" - "with3=%(with2)s\n" - "with2=%(with1)s\n" - "with1=with\n" + "bar{equals}something %(with1)s interpolation (1 step)\n" + "bar9{equals}something %(with9)s lots of interpolation (9 steps)\n" + "bar10{equals}something %(with10)s lots of interpolation (10 steps)\n" + "bar11{equals}something %(with11)s lots of interpolation (11 steps)\n" + "with11{equals}%(with10)s\n" + "with10{equals}%(with9)s\n" + "with9{equals}%(with8)s\n" + "with8{equals}%(With7)s\n" + "with7{equals}%(WITH6)s\n" + "with6{equals}%(with5)s\n" + "With5{equals}%(with4)s\n" + "WITH4{equals}%(with3)s\n" + "with3{equals}%(with2)s\n" + "with2{equals}%(with1)s\n" + "with1{equals}with\n" "\n" "[Mutual Recursion]\n" - "foo=%(bar)s\n" - "bar=%(foo)s\n" + "foo{equals}%(bar)s\n" + "bar{equals}%(foo)s\n" "\n" "[Interpolation Error]\n" - "name=%(reference)s\n", + "name{equals}%(reference)s\n".format(equals=self.delimiters[0]), # no definition for 'reference' defaults={"getname": "%(__name__)s"}) def check_items_config(self, expected): cf = self.fromstring( "[section]\n" - "name = value\n" - "key: |%(name)s| \n" - "getdefault: |%(default)s|\n" - "getname: |%(__name__)s|", + "name {0[0]} value\n" + "key{0[1]} |%(name)s| \n" + "getdefault{0[1]} |%(default)s|\n" + "getname{0[1]} |%(__name__)s|".format(self.delimiters), defaults={"default": "<default>"}) L = list(cf.items("section")) L.sort() self.assertEqual(L, expected) -class ConfigParserTestCase(TestCaseBase): +class ConfigParserTestCase(BasicTestCase): config_class = configparser.ConfigParser def test_interpolation(self): @@ -414,7 +454,11 @@ class ConfigParserTestCase(TestCaseBase): self.assertRaises(ValueError, cf.get, 'non-string', 'string_with_interpolation', raw=False) -class MultilineValuesTestCase(TestCaseBase): +class ConfigParserTestCaseNonStandardDelimiters(ConfigParserTestCase): + delimiters = (':=', '$') + comment_prefixes = ('//', '"') + +class MultilineValuesTestCase(BasicTestCase): config_class = configparser.ConfigParser wonderful_spam = ("I'm having spam spam spam spam " "spam spam spam beaked beans spam " @@ -442,7 +486,7 @@ class MultilineValuesTestCase(TestCaseBase): self.assertEqual(cf_from_file.get('section8', 'lovely_spam4'), self.wonderful_spam.replace('\t\n', '\n')) -class RawConfigParserTestCase(TestCaseBase): +class RawConfigParserTestCase(BasicTestCase): config_class = configparser.RawConfigParser def test_interpolation(self): @@ -476,6 +520,28 @@ class RawConfigParserTestCase(TestCaseBase): [0, 1, 1, 2, 3, 5, 8, 13]) self.assertEqual(cf.get('non-string', 'dict'), {'pi': 3.14159}) +class RawConfigParserTestCaseNonStandardDelimiters(RawConfigParserTestCase): + delimiters = (':=', '$') + comment_prefixes = ('//', '"') + +class RawConfigParserTestSambaConf(BasicTestCase): + config_class = configparser.RawConfigParser + comment_prefixes = ('#', ';', '//', '----') + empty_lines_in_values = False + + def test_reading(self): + smbconf = support.findfile("cfgparser.2") + # check when we pass a mix of readable and non-readable files: + cf = self.newconfig() + parsed_files = cf.read([smbconf, "nonexistent-file"]) + self.assertEqual(parsed_files, [smbconf]) + sections = ['global', 'homes', 'printers', + 'print$', 'pdf-generator', 'tmp', 'Agustin'] + self.assertEqual(cf.sections(), sections) + self.assertEqual(cf.get("global", "workgroup"), "MDKGROUP") + self.assertEqual(cf.getint("global", "max log size"), 50) + self.assertEqual(cf.get("global", "hosts allow"), "127.") + self.assertEqual(cf.get("tmp", "echo command"), "cat %s; rm %s") class SafeConfigParserTestCase(ConfigParserTestCase): config_class = configparser.SafeConfigParser @@ -483,16 +549,17 @@ class SafeConfigParserTestCase(ConfigParserTestCase): def test_safe_interpolation(self): # See http://www.python.org/sf/511737 cf = self.fromstring("[section]\n" - "option1=xxx\n" - "option2=%(option1)s/xxx\n" - "ok=%(option1)s/%%s\n" - "not_ok=%(option2)s/%%s") + "option1{eq}xxx\n" + "option2{eq}%(option1)s/xxx\n" + "ok{eq}%(option1)s/%%s\n" + "not_ok{eq}%(option2)s/%%s".format( + eq=self.delimiters[0])) self.assertEqual(cf.get("section", "ok"), "xxx/%s") self.assertEqual(cf.get("section", "not_ok"), "xxx/xxx/%s") def test_set_malformatted_interpolation(self): cf = self.fromstring("[sect]\n" - "option1=foo\n") + "option1{eq}foo\n".format(eq=self.delimiters[0])) self.assertEqual(cf.get('sect', "option1"), "foo") @@ -508,7 +575,7 @@ class SafeConfigParserTestCase(ConfigParserTestCase): def test_set_nonstring_types(self): cf = self.fromstring("[sect]\n" - "option1=foo\n") + "option1{eq}foo\n".format(eq=self.delimiters[0])) # Check that we get a TypeError when setting non-string values # in an existing section: self.assertRaises(TypeError, cf.set, "sect", "option1", 1) @@ -526,15 +593,16 @@ class SafeConfigParserTestCase(ConfigParserTestCase): cf = self.newconfig() self.assertRaises(ValueError, cf.add_section, "DEFAULT") +class SafeConfigParserTestCaseNonStandardDelimiters(SafeConfigParserTestCase): + delimiters = (':=', '$') + comment_prefixes = ('//', '"') class SafeConfigParserTestCaseNoValue(SafeConfigParserTestCase): allow_no_value = True class SortedTestCase(RawConfigParserTestCase): - def newconfig(self, defaults=None): - self.cf = self.config_class(defaults=defaults, dict_type=SortedDict) - return self.cf + dict_type = SortedDict def test_sorted(self): self.fromstring("[b]\n" @@ -556,14 +624,36 @@ class SortedTestCase(RawConfigParserTestCase): "o4 = 1\n\n") +class CompatibleTestCase(CfgParserTestCaseClass): + config_class = configparser.RawConfigParser + comment_prefixes = configparser.RawConfigParser._COMPATIBLE + + def test_comment_handling(self): + config_string = textwrap.dedent("""\ + [Commented Bar] + baz=qwe ; a comment + foo: bar # not a comment! + # but this is a comment + ; another comment + """) + cf = self.fromstring(config_string) + self.assertEqual(cf.get('Commented Bar', 'foo'), 'bar # not a comment!') + self.assertEqual(cf.get('Commented Bar', 'baz'), 'qwe') + + def test_main(): support.run_unittest( ConfigParserTestCase, + ConfigParserTestCaseNonStandardDelimiters, MultilineValuesTestCase, RawConfigParserTestCase, + RawConfigParserTestCaseNonStandardDelimiters, + RawConfigParserTestSambaConf, SafeConfigParserTestCase, + SafeConfigParserTestCaseNonStandardDelimiters, SafeConfigParserTestCaseNoValue, SortedTestCase, + CompatibleTestCase, ) @@ -475,6 +475,9 @@ C-API Library ------- +- Issue #1682942: Improvements to configparser: support alternate + delimiters, alternate comment prefixes and empty lines in values. + - Issue #9354: Provide getsockopt() in asyncore's file_wrapper. - Issue #8966: ctypes: Remove implicit bytes-unicode conversion. |