From 8dcaa7396fd89ec84a29ae90c7958d0618ee6c62 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Thu, 29 Jul 2010 12:17:40 +0000 Subject: #9411: allow selecting an encoding for configparser files. Also adds a new test config file to test special cases. --- Doc/library/configparser.rst | 28 ++++++++++-------- Lib/configparser.py | 6 ++-- Lib/test/cfgparser.3 | 69 ++++++++++++++++++++++++++++++++++++++++++++ Lib/test/test_cfgparser.py | 46 ++++++++++++++++++++++++++++- Misc/NEWS | 3 ++ 5 files changed, 136 insertions(+), 16 deletions(-) create mode 100644 Lib/test/cfgparser.3 diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst index 792784b..f77a5b8 100644 --- a/Doc/library/configparser.rst +++ b/Doc/library/configparser.rst @@ -286,25 +286,29 @@ RawConfigParser Objects :const:`True`; otherwise return :const:`False`. -.. method:: RawConfigParser.read(filenames) +.. method:: RawConfigParser.read(filenames, encoding=None) Attempt to read and parse a list of filenames, returning a list of filenames - which were successfully parsed. If *filenames* is a string, - it is treated as a single filename. If a file named in *filenames* cannot be - opened, that file will be ignored. This is designed so that you can specify a - list of potential configuration file locations (for example, the current - directory, the user's home directory, and some system-wide directory), and all - existing configuration files in the list will be read. If none of the named - files exist, the :class:`ConfigParser` instance will contain an empty dataset. - An application which requires initial values to be loaded from a file should - load the required file or files using :meth:`readfp` before calling :meth:`read` - for any optional files:: + which were successfully parsed. If *filenames* is a string, it is treated as + a single filename. If a file named in *filenames* cannot be opened, that + file will be ignored. This is designed so that you can specify a list of + potential configuration file locations (for example, the current directory, + the user's home directory, and some system-wide directory), and all existing + configuration files in the list will be read. If none of the named files + exist, the :class:`ConfigParser` instance will contain an empty dataset. An + application which requires initial values to be loaded from a file should + load the required file or files using :meth:`readfp` before calling + :meth:`read` for any optional files:: import configparser, os config = configparser.ConfigParser() config.readfp(open('defaults.cfg')) - config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')]) + config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')], encoding='cp1250') + + .. versionadded:: 3.2 + The *encoding* parameter. Previously, all files were read using the + default encoding for :func:`open`. .. method:: RawConfigParser.readfp(fp, filename=None) diff --git a/Lib/configparser.py b/Lib/configparser.py index 8c0546a..7ad24d8 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -61,7 +61,7 @@ ConfigParser -- responsible for parsing a list of options(section) Return list of configuration options for the named section. - read(filenames) + read(filenames, encoding=None) Read and parse the list of named configuration files, given by name. A single filename is also allowed. Non-existing files are ignored. Return list of successfully read files. @@ -369,7 +369,7 @@ class RawConfigParser: del opts['__name__'] return list(opts.keys()) - def read(self, filenames): + def read(self, filenames, encoding=None): """Read and parse a filename or a list of filenames. Files that cannot be opened are silently ignored; this is @@ -386,7 +386,7 @@ class RawConfigParser: read_ok = [] for filename in filenames: try: - fp = open(filename) + fp = open(filename, encoding=encoding) except IOError: continue self._read(fp, filename) diff --git a/Lib/test/cfgparser.3 b/Lib/test/cfgparser.3 new file mode 100644 index 0000000..c182cd7 --- /dev/null +++ b/Lib/test/cfgparser.3 @@ -0,0 +1,69 @@ + # INI with as many tricky parts as possible + # Most of them could not be used before 3.2 + + # This will be parsed with the following options + # delimiters = {'='} + # comment_prefixes = {'#'} + # allow_no_value = True + +[DEFAULT] +go = %(interpolate)s + +[strange] + values = that are indented # and end with hash comments + other = that do continue + in # and still have + other # comments mixed + lines # with the values + + + + + +[corruption] + value = that is + + + actually still here + + + and holds all these weird newlines + + + # but not for the lines that are comments + nor the indentation + + another value = # empty string + yet another # None! + + [yeah, sections can be indented as well] + and that does not mean = anything + are they subsections = False + if you want subsections = use XML + lets use some Unicode = 片仮名 + + [another one!] + even if values are indented like this = seriously +yes, this still applies to = section "another one!" +this too = are there people with configurations broken as this? + beware, this is going to be a continuation + of the value for + key "this too" + even if it has a = character + this is still the continuation + your editor probably highlights it wrong + but that's life +# let's set this value so there is no error +# when getting all items for this section: +interpolate = anything will do + +[no values here] +# but there's this `go` in DEFAULT + + [tricky interpolation] + interpolate = do this + lets = %(go)s + + [more interpolation] + interpolate = go shopping + lets = %(go)s diff --git a/Lib/test/test_cfgparser.py b/Lib/test/test_cfgparser.py index e00a51a..5a77bfd 100644 --- a/Lib/test/test_cfgparser.py +++ b/Lib/test/test_cfgparser.py @@ -533,7 +533,7 @@ class RawConfigParserTestSambaConf(BasicTestCase): smbconf = support.findfile("cfgparser.2") # check when we pass a mix of readable and non-readable files: cf = self.newconfig() - parsed_files = cf.read([smbconf, "nonexistent-file"]) + parsed_files = cf.read([smbconf, "nonexistent-file"], encoding='utf-8') self.assertEqual(parsed_files, [smbconf]) sections = ['global', 'homes', 'printers', 'print$', 'pdf-generator', 'tmp', 'Agustin'] @@ -600,6 +600,46 @@ class SafeConfigParserTestCaseNonStandardDelimiters(SafeConfigParserTestCase): class SafeConfigParserTestCaseNoValue(SafeConfigParserTestCase): allow_no_value = True +class SafeConfigParserTestCaseTrickyFile(CfgParserTestCaseClass): + config_class = configparser.SafeConfigParser + delimiters = {'='} + comment_prefixes = {'#'} + allow_no_value = True + + def test_cfgparser_dot_3(self): + tricky = support.findfile("cfgparser.3") + cf = self.newconfig() + self.assertEqual(len(cf.read(tricky, encoding='utf-8')), 1) + self.assertEqual(cf.sections(), ['strange', + 'corruption', + 'yeah, sections can be ' + 'indented as well', + 'another one!', + 'no values here', + 'tricky interpolation', + 'more interpolation']) + #self.assertEqual(cf.getint('DEFAULT', 'go', vars={'interpolate': '-1'}), + # -1) + self.assertEqual(len(cf.get('strange', 'other').split('\n')), 4) + self.assertEqual(len(cf.get('corruption', 'value').split('\n')), 10) + longname = 'yeah, sections can be indented as well' + self.assertFalse(cf.getboolean(longname, 'are they subsections')) + self.assertEquals(cf.get(longname, 'lets use some Unicode'), + '片仮名') + self.assertEqual(len(cf.items('another one!')), 5) # 4 in section and + # `go` from DEFAULT + with self.assertRaises(configparser.InterpolationMissingOptionError): + cf.items('no values here') + self.assertEqual(cf.get('tricky interpolation', 'lets'), 'do this') + self.assertEqual(cf.get('tricky interpolation', 'lets'), + cf.get('tricky interpolation', 'go')) + self.assertEqual(cf.get('more interpolation', 'lets'), 'go shopping') + + def test_unicode_failure(self): + tricky = support.findfile("cfgparser.3") + cf = self.newconfig() + with self.assertRaises(UnicodeDecodeError): + cf.read(tricky, encoding='ascii') class SortedTestCase(RawConfigParserTestCase): dict_type = SortedDict @@ -635,10 +675,13 @@ class CompatibleTestCase(CfgParserTestCaseClass): foo: bar # not a comment! # but this is a comment ; another comment + quirk: this;is not a comment + ; a space must precede a comment character """) cf = self.fromstring(config_string) self.assertEqual(cf.get('Commented Bar', 'foo'), 'bar # not a comment!') self.assertEqual(cf.get('Commented Bar', 'baz'), 'qwe') + self.assertEqual(cf.get('Commented Bar', 'quirk'), 'this;is not a comment') def test_main(): @@ -652,6 +695,7 @@ def test_main(): SafeConfigParserTestCase, SafeConfigParserTestCaseNonStandardDelimiters, SafeConfigParserTestCaseNoValue, + SafeConfigParserTestCaseTrickyFile, SortedTestCase, CompatibleTestCase, ) diff --git a/Misc/NEWS b/Misc/NEWS index 5850f08..25b4798 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -475,6 +475,9 @@ C-API Library ------- +- Issue #9411: Allow specifying an encoding for config files in the + configparser module. + - Issue #1682942: Improvements to configparser: support alternate delimiters, alternate comment prefixes and empty lines in values. -- cgit v0.12