diff options
author | Skip Montanaro <skip@pobox.com> | 2009-09-28 02:12:27 (GMT) |
---|---|---|
committer | Skip Montanaro <skip@pobox.com> | 2009-09-28 02:12:27 (GMT) |
commit | b4fd4d37a1b01620cd1bf66d61dcd481b13db411 (patch) | |
tree | c0f50b69aca5e3764e859522142799c923cc36ab /Lib/csv.py | |
parent | 17565e5b7be31a85165e4fa8b2324e27831fd742 (diff) | |
download | cpython-b4fd4d37a1b01620cd1bf66d61dcd481b13db411.zip cpython-b4fd4d37a1b01620cd1bf66d61dcd481b13db411.tar.gz cpython-b4fd4d37a1b01620cd1bf66d61dcd481b13db411.tar.bz2 |
Patch from Thomas Barr so that csv.Sniffer will set doublequote property.
Closes issue 6606.
Diffstat (limited to 'Lib/csv.py')
-rw-r--r-- | Lib/csv.py | 22 |
1 files changed, 17 insertions, 5 deletions
@@ -170,7 +170,7 @@ class Sniffer: Returns a dialect (or None) corresponding to the sample """ - quotechar, delimiter, skipinitialspace = \ + quotechar, doublequote, delimiter, skipinitialspace = \ self._guess_quote_and_delimiter(sample, delimiters) if not delimiter: delimiter, skipinitialspace = self._guess_delimiter(sample, @@ -184,8 +184,8 @@ class Sniffer: lineterminator = '\r\n' quoting = QUOTE_MINIMAL # escapechar = '' - doublequote = False + dialect.doublequote = doublequote dialect.delimiter = delimiter # _csv.reader won't accept a quotechar of '' dialect.quotechar = quotechar or '"' @@ -217,8 +217,8 @@ class Sniffer: break if not matches: - return ('', None, 0) # (quotechar, delimiter, skipinitialspace) - + # (quotechar, doublequote, delimiter, skipinitialspace) + return ('', False, None, 0) quotes = {} delims = {} spaces = 0 @@ -255,7 +255,19 @@ class Sniffer: delim = '' skipinitialspace = 0 - return (quotechar, delim, skipinitialspace) + # if we see an extra quote between delimiters, we've got a + # double quoted format + dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \ + {'delim':delim, 'quote':quotechar}, re.MULTILINE) + + + + if dq_regexp.search(data): + doublequote = True + else: + doublequote = False + + return (quotechar, doublequote, delim, skipinitialspace) def _guess_delimiter(self, data, delimiters): |